On 17.3.2020. 13:16, Martin Pieuchot wrote:
> Diff below allows ix(4) and em(4) to establish two MSI-X handlers.
> Multiqueue support is intentionally not included in this diff.
> 


Hi all,

if someone is willing to make em multiqueue i have
em0 at pci6 dev 0 function 0 "Intel 82576" rev 0x01, msix, 1 queue
em2 at pci7 dev 0 function 0 "Intel I210" rev 0x03, msix, 1 queue
em4 at pci11 dev 0 function 0 "Intel I350" rev 0x01, msix, 1 queue
for testing and i will look for 82574 and 82575


Thank you ....



> One handler is for the single queue (index ":0") and one for the
> link interrupt:
> 
>   # vmstat -i |grep ix0
>   irq114/ix0:0                 73178178     3758
>   irq115/ix0                          2        0
> 
> A per-driver toggle allows to switch MSI-X support on/off.  My plan is
> to commit with the switch "off" for the moment.  Switching it to "on"
> should be better done in the beginning of a release cycle.  However it
> is "on" in the diff below for testing purposes.
> 
> The em(4) diff doesn't include support for 82575 nor 82574.  If somebody
> has such hardware and is interested I can lift the necessary bits from
> FreeBSD.  That said em_setup_queues_msix() contains references to 82575
> to help diffing with FreeBSD.
> 
> The ix(4) diff cannot be committed right now as it breaks the build on
> architectures that define pci_intr_map_msix() to (-1).  The compiler
> complains that the arguments of the macro are unused.
> 
>   /sys/dev/pci/if_ix.c:1789:21: error: unused variable 'dummy'
>         [-Werror,-Wunused-variable]
>           pci_intr_handle_t        dummy;
>                                    ^
>   /sys/dev/pci/if_ix.c:1788:26: error: unused variable 'pa'
>         [-Werror,-Wunused-variable]
>           struct pci_attach_args  *pa = &os->os_pa;
> 
> So I'd like to know if that's the way to test if MSI-X can be used:
> 
>        /*
>         * Try a dummy map, maybe this bus doesn't like MSI, this function
>         * has no side effects.
>         */
>        if (pci_intr_map_msix(pa, 0, &dummy))
>                return (0);
> 
> I'd appreciate tests on many hardware as possible, especially for em(4).
> If you have been testing an earlier version of these diffs, please do
> it again :)
> 
> Thanks,
> Martin
> 
> Index: dev/pci/if_em.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.c,v
> retrieving revision 1.347
> diff -u -p -r1.347 if_em.c
> --- dev/pci/if_em.c   8 Mar 2020 11:43:43 -0000       1.347
> +++ dev/pci/if_em.c   17 Mar 2020 11:18:01 -0000
> @@ -233,6 +233,7 @@ void em_defer_attach(struct device*);
>  int  em_detach(struct device *, int);
>  int  em_activate(struct device *, int);
>  int  em_intr(void *);
> +int  em_allocate_legacy(struct em_softc *);
>  void em_start(struct ifqueue *);
>  int  em_ioctl(struct ifnet *, u_long, caddr_t);
>  void em_watchdog(struct ifnet *);
> @@ -290,6 +291,13 @@ void em_flush_tx_ring(struct em_queue *)
>  void em_flush_rx_ring(struct em_queue *);
>  void em_flush_desc_rings(struct em_softc *);
>  
> +/* MSIX/Multiqueue functions */
> +int  em_allocate_msix(struct em_softc *);
> +int  em_setup_queues_msix(struct em_softc *);
> +int  em_queue_intr_msix(void *);
> +int  em_link_intr_msix(void *);
> +void em_enable_queue_intr_msix(struct em_queue *);
> +
>  /*********************************************************************
>   *  OpenBSD Device Interface Entry Points
>   *********************************************************************/
> @@ -304,6 +312,7 @@ struct cfdriver em_cd = {
>  };
>  
>  static int em_smart_pwr_down = FALSE;
> +int em_enable_msix = 1;
>  
>  /*********************************************************************
>   *  Device identification routine
> @@ -919,6 +928,14 @@ em_init(void *arg)
>       }
>       em_initialize_receive_unit(sc);
>  
> +     if (sc->msix) {
> +             if (em_setup_queues_msix(sc)) {
> +                     printf("%s: Can't setup msix queues\n", DEVNAME(sc));
> +                     splx(s);
> +                     return;
> +             }
> +     }
> +
>       /* Program promiscuous mode and multicast filters. */
>       em_iff(sc);
>  
> @@ -1617,10 +1634,7 @@ int
>  em_allocate_pci_resources(struct em_softc *sc)
>  {
>       int             val, rid;
> -     pci_intr_handle_t       ih;
> -     const char              *intrstr = NULL;
>       struct pci_attach_args *pa = &sc->osdep.em_pa;
> -     pci_chipset_tag_t       pc = pa->pa_pc;
>       struct em_queue        *que = NULL;
>  
>       val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
> @@ -1691,6 +1705,9 @@ em_allocate_pci_resources(struct em_soft
>               }
>          }
>  
> +     sc->osdep.dev = (struct device *)sc;
> +     sc->hw.back = &sc->osdep;
> +
>       /* Only one queue for the moment. */
>       que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
>       if (que == NULL) {
> @@ -1703,29 +1720,10 @@ em_allocate_pci_resources(struct em_soft
>  
>       sc->queues = que;
>       sc->num_queues = 1;
> +     sc->msix = 0;
>       sc->legacy_irq = 0;
> -     if (pci_intr_map_msi(pa, &ih)) {
> -             if (pci_intr_map(pa, &ih)) {
> -                     printf(": couldn't map interrupt\n");
> -                     return (ENXIO);
> -             }
> -             sc->legacy_irq = 1;
> -     }
> -
> -     sc->osdep.dev = (struct device *)sc;
> -     sc->hw.back = &sc->osdep;
> -
> -     intrstr = pci_intr_string(pc, ih);
> -     sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> -         em_intr, sc, DEVNAME(sc));
> -     if (sc->sc_intrhand == NULL) {
> -             printf(": couldn't establish interrupt");
> -             if (intrstr != NULL)
> -                     printf(" at %s", intrstr);
> -             printf("\n");
> +     if (em_allocate_msix(sc) && em_allocate_legacy(sc))
>               return (ENXIO);
> -     }
> -     printf(": %s", intrstr);
>  
>       /*
>        * the ICP_xxxx device has multiple, duplicate register sets for
> @@ -1784,6 +1782,15 @@ em_free_pci_resources(struct em_softc *s
>               que->tx.sc_tx_desc_ring = NULL;
>               em_dma_free(sc, &que->tx.sc_tx_dma);
>       }
> +     if (que->tag)
> +             pci_intr_disestablish(pc, que->tag);
> +     que->tag = NULL;
> +     que->eims = 0;
> +     que->me = 0;
> +     que->sc = NULL;
> +     sc->legacy_irq = 0;
> +     sc->msix_linkvec = 0;
> +     sc->msix_queuesmask = 0;
>       if (sc->queues)
>               free(sc->queues, M_DEVBUF,
>                   sc->num_queues * sizeof(struct em_queue));
> @@ -1971,6 +1978,7 @@ em_setup_interface(struct em_softc *sc)
>  
>       if_attach(ifp);
>       ether_ifattach(ifp);
> +     em_enable_intr(sc);
>  }
>  
>  int
> @@ -3017,7 +3025,16 @@ em_enable_hw_vlans(struct em_softc *sc)
>  void
>  em_enable_intr(struct em_softc *sc)
>  {
> -     E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
> +     uint32_t mask;
> +
> +     if (sc->msix) {
> +             mask = sc->msix_queuesmask | sc->msix_linkmask;
> +             E1000_WRITE_REG(&sc->hw, EIAC, mask);
> +             E1000_WRITE_REG(&sc->hw, EIAM, mask);
> +             E1000_WRITE_REG(&sc->hw, EIMS, mask);
> +             E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
> +     } else
> +             E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
>  }
>  
>  void
> @@ -3031,8 +3048,10 @@ em_disable_intr(struct em_softc *sc)
>        * For this to work correctly the Sequence error interrupt had
>        * to be enabled all the time.
>        */
> -
> -     if (sc->hw.mac_type == em_82542_rev2_0)
> +     if (sc->msix) {
> +             E1000_WRITE_REG(&sc->hw, EIMC, ~0);
> +             E1000_WRITE_REG(&sc->hw, EIAC, 0);
> +     } else if (sc->hw.mac_type == em_82542_rev2_0)
>               E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
>       else
>               E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
> @@ -3506,6 +3525,264 @@ em_print_hw_stats(struct em_softc *sc)
>  }
>  #endif
>  #endif /* !SMALL_KERNEL */
> +
> +int
> +em_allocate_legacy(struct em_softc *sc)
> +{
> +     pci_intr_handle_t        ih;
> +     const char              *intrstr = NULL;
> +     struct pci_attach_args  *pa = &sc->osdep.em_pa;
> +     pci_chipset_tag_t        pc = pa->pa_pc;
> +
> +     if (pci_intr_map_msi(pa, &ih)) {
> +             if (pci_intr_map(pa, &ih)) {
> +                     printf(": couldn't map interrupt\n");
> +                     return (ENXIO);
> +             }
> +             sc->legacy_irq = 1;
> +     }
> +
> +     intrstr = pci_intr_string(pc, ih);
> +     sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> +         em_intr, sc, DEVNAME(sc));
> +     if (sc->sc_intrhand == NULL) {
> +             printf(": couldn't establish interrupt");
> +             if (intrstr != NULL)
> +                     printf(" at %s", intrstr);
> +             printf("\n");
> +             return (ENXIO);
> +     }
> +     printf(": %s", intrstr);
> +
> +     return (0);
> +}
> +
> +int
> +em_allocate_msix(struct em_softc *sc)
> +{
> +     pci_intr_handle_t        ih;
> +     const char              *intrstr = NULL;
> +     struct pci_attach_args  *pa = &sc->osdep.em_pa;
> +     pci_chipset_tag_t        pc = pa->pa_pc;
> +     struct em_queue         *que = sc->queues; /* Use only first queue. */
> +     int                      vec;
> +
> +     if (!em_enable_msix)
> +             return (ENODEV);
> +
> +     switch (sc->hw.mac_type) {
> +     case em_82576:
> +     case em_82580:
> +     case em_i350:
> +     case em_i210:
> +             break;
> +     default:
> +             return (ENODEV);
> +     }
> +
> +     vec = 0;
> +     if (pci_intr_map_msix(pa, vec, &ih))
> +             return (ENODEV);
> +     sc->msix = 1;
> +
> +     que->me = vec;
> +     que->eims = 1 << vec;
> +     snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
> +
> +     intrstr = pci_intr_string(pc, ih);
> +     que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> +         em_queue_intr_msix, que, que->name);
> +     if (que->tag == NULL) {
> +             printf(": couldn't establish interrupt");
> +             if (intrstr != NULL)
> +                     printf(" at %s", intrstr);
> +             printf("\n");
> +             return (ENXIO);
> +     }
> +
> +     /* Setup linkvector, use last queue vector + 1 */
> +     vec++;
> +     sc->msix_linkvec = vec;
> +     if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
> +             printf(": couldn't map link vector\n");
> +             return (ENXIO);
> +     }
> +
> +     intrstr = pci_intr_string(pc, ih);
> +     sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> +         em_link_intr_msix, sc, DEVNAME(sc));
> +     if (sc->sc_intrhand == NULL) {
> +             printf(": couldn't establish interrupt");
> +             if (intrstr != NULL)
> +                     printf(" at %s", intrstr);
> +             printf("\n");
> +             return (ENXIO);
> +     }
> +     printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
> +
> +     return (0);
> +}
> +
> +/*
> + * Interrupt for a specific queue, (not link interrupts). The EICR bit which
> + * maps to the EIMS bit expresses both RX and TX, therefore we can't
> + * distringuish if this is a RX completion of TX completion and must do both.
> + * The bits in EICR are autocleared and we _cannot_ read EICR.
> + */
> +int
> +em_queue_intr_msix(void *vque)
> +{
> +     struct em_queue *que = vque;
> +     struct em_softc *sc = que->sc;
> +     struct ifnet   *ifp = &sc->sc_ac.ac_if;
> +
> +     if (ifp->if_flags & IFF_RUNNING) {
> +             em_txeof(que);
> +             if (em_rxeof(que))
> +                     em_rxrefill(que);
> +     }
> +
> +     em_enable_queue_intr_msix(que);
> +
> +     return (1);
> +}
> +
> +int
> +em_link_intr_msix(void *arg)
> +{
> +     struct em_softc *sc = arg;
> +     uint32_t icr;
> +
> +     icr = E1000_READ_REG(&sc->hw, ICR);
> +
> +     /* Link status change */
> +     if (icr & E1000_ICR_LSC) {
> +             KERNEL_LOCK();
> +             sc->hw.get_link_status = 1;
> +             em_check_for_link(&sc->hw);
> +             em_update_link_status(sc);
> +             KERNEL_UNLOCK();
> +     }
> +
> +     /* Re-arm unconditionally */
> +     E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
> +     E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
> +
> +     return (1);
> +}
> +
> +/*
> + * Maps queues into msix interrupt vectors.
> + */
> +int
> +em_setup_queues_msix(struct em_softc *sc)
> +{
> +     struct em_queue *que = sc->queues; /* Use only first queue. */
> +     uint32_t ivar, newitr, index;
> +
> +     KASSERT(sc->msix);
> +
> +     /* First turn on RSS capability */
> +     if (sc->hw.mac_type != em_82575)
> +             E1000_WRITE_REG(&sc->hw, GPIE,
> +                 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
> +                 E1000_GPIE_PBA | E1000_GPIE_NSICR);
> +
> +     /* Turn on MSIX */
> +     switch (sc->hw.mac_type) {
> +     case em_82580:
> +     case em_i350:
> +     case em_i210:
> +             /* RX entries */
> +             /*
> +              * Note, this maps Queues into MSIX vectors, it works fine.
> +              * The funky calculation of offsets and checking if que->me is
> +              * odd is due to the weird register distribution, the datasheet
> +              * explains it well.
> +              */
> +             index = que->me >> 1;
> +             ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> +             if (que->me & 1) {
> +                     ivar &= 0xFF00FFFF;
> +                     ivar |= (que->me | E1000_IVAR_VALID) << 16;
> +             } else {
> +                     ivar &= 0xFFFFFF00;
> +                     ivar |= que->me | E1000_IVAR_VALID;
> +             }
> +             E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> +
> +             /* TX entries */
> +             index = que->me >> 1;
> +             ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> +             if (que->me & 1) {
> +                     ivar &= 0x00FFFFFF;
> +                     ivar |= (que->me | E1000_IVAR_VALID) << 24;
> +             } else {
> +                     ivar &= 0xFFFF00FF;
> +                     ivar |= (que->me | E1000_IVAR_VALID) << 8;
> +             }
> +             E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> +             sc->msix_queuesmask |= que->eims;
> +
> +             /* And for the link interrupt */
> +             ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
> +             sc->msix_linkmask = 1 << sc->msix_linkvec;
> +             E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
> +             break;
> +     case em_82576:
> +             /* RX entries */
> +             index = que->me & 0x7; /* Each IVAR has two entries */
> +             ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> +             if (que->me < 8) {
> +                     ivar &= 0xFFFFFF00;
> +                     ivar |= que->me | E1000_IVAR_VALID;
> +             } else {
> +                     ivar &= 0xFF00FFFF;
> +                     ivar |= (que->me | E1000_IVAR_VALID) << 16;
> +             }
> +             E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> +             sc->msix_queuesmask |= que->eims;
> +             /* TX entries */
> +             index = que->me & 0x7; /* Each IVAR has two entries */
> +             ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> +             if (que->me < 8) {
> +                     ivar &= 0xFFFF00FF;
> +                     ivar |= (que->me | E1000_IVAR_VALID) << 8;
> +             } else {
> +                     ivar &= 0x00FFFFFF;
> +                     ivar |= (que->me | E1000_IVAR_VALID) << 24;
> +             }
> +             E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> +             sc->msix_queuesmask |= que->eims;
> +
> +             /* And for the link interrupt */
> +             ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
> +             sc->msix_linkmask = 1 << sc->msix_linkvec;
> +             E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
> +             break;
> +     default:
> +             panic("unsupported mac");
> +             break;
> +     }
> +
> +     /* Set the starting interrupt rate */
> +     newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
> +
> +     if (sc->hw.mac_type == em_82575)
> +             newitr |= newitr << 16;
> +     else
> +             newitr |= E1000_EITR_CNT_IGNR;
> +
> +     E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
> +
> +     return (0);
> +}
> +
> +void
> +em_enable_queue_intr_msix(struct em_queue *que)
> +{
> +     E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
> +}
>  
>  int
>  em_allocate_desc_rings(struct em_softc *sc)
> Index: dev/pci/if_em.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.h,v
> retrieving revision 1.75
> diff -u -p -r1.75 if_em.h
> --- dev/pci/if_em.h   20 Feb 2020 09:32:49 -0000      1.75
> +++ dev/pci/if_em.h   9 Mar 2020 10:48:15 -0000
> @@ -358,7 +358,9 @@ struct em_softc;
>  struct em_queue {
>       struct em_softc         *sc;
>       uint32_t                 me;    /* queue index, also msix vector */
> -
> +     uint32_t                 eims;  /* msix only */
> +     void                    *tag;   /* NULL in legacy, check sc_intrhand */
> +     char                     name[8];
>       struct em_tx             tx;
>       struct em_rx             rx;
>  
> @@ -433,6 +435,10 @@ struct em_softc {
>       boolean_t       pcix_82544;
>       struct em_hw_stats stats;
>  
> +     int                      msix;
> +     uint32_t                 msix_linkvec;
> +     uint32_t                 msix_linkmask;
> +     uint32_t                 msix_queuesmask;
>       int                      num_queues;
>       struct em_queue         *queues;
>  };
> Index: dev/pci/if_em_hw.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
> retrieving revision 1.81
> diff -u -p -r1.81 if_em_hw.h
> --- dev/pci/if_em_hw.h        8 Mar 2020 11:43:43 -0000       1.81
> +++ dev/pci/if_em_hw.h        17 Mar 2020 10:58:49 -0000
> @@ -2033,6 +2033,11 @@ struct em_hw {
>  #define E1000_RXDCTL_THRESH_UNIT_DESC 0x1000000
>  #define E1000_RXDCTL_QUEUE_ENABLE 0x2000000
>  
> +#define E1000_EITR_ITR_INT_MASK      0x0000FFFF
> +/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
> +#define E1000_EITR_CNT_IGNR  0x80000000 /* Don't reset counters on write */
> +#define E1000_EITR_INTERVAL  0x00007FFC
> +
>  /* Transmit Descriptor Control */
>  #define E1000_TXDCTL_PTHRESH 0x000000FF /* TXDCTL Prefetch Threshold */
>  #define E1000_TXDCTL_HTHRESH 0x0000FF00 /* TXDCTL Host Threshold */
> Index: dev/pci/if_ix.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
> retrieving revision 1.161
> diff -u -p -r1.161 if_ix.c
> --- dev/pci/if_ix.c   2 Mar 2020 01:59:01 -0000       1.161
> +++ dev/pci/if_ix.c   17 Mar 2020 11:41:58 -0000
> @@ -114,6 +114,8 @@ int       ixgbe_media_change(struct ifnet *);
>  void ixgbe_identify_hardware(struct ix_softc *);
>  int  ixgbe_allocate_pci_resources(struct ix_softc *);
>  int  ixgbe_allocate_legacy(struct ix_softc *);
> +int  ixgbe_allocate_msix(struct ix_softc *);
> +int  ixgbe_setup_msix(struct ix_softc *);
>  int  ixgbe_allocate_queues(struct ix_softc *);
>  void ixgbe_free_pci_resources(struct ix_softc *);
>  void ixgbe_local_timer(void *);
> @@ -140,6 +142,7 @@ void      ixgbe_initialize_rss_mapping(struct
>  int  ixgbe_rxfill(struct rx_ring *);
>  void ixgbe_rxrefill(void *);
>  
> +int  ixgbe_intr(struct ix_softc *sc);
>  void ixgbe_enable_intr(struct ix_softc *);
>  void ixgbe_disable_intr(struct ix_softc *);
>  void ixgbe_update_stats_counters(struct ix_softc *);
> @@ -172,11 +175,16 @@ void    ixgbe_handle_msf(struct ix_softc *)
>  void ixgbe_handle_phy(struct ix_softc *);
>  
>  /* Legacy (single vector interrupt handler */
> -int  ixgbe_intr(void *);
> +int  ixgbe_legacy_intr(void *);
>  void ixgbe_enable_queue(struct ix_softc *, uint32_t);
> +void ixgbe_enable_queues(struct ix_softc *);
>  void ixgbe_disable_queue(struct ix_softc *, uint32_t);
>  void ixgbe_rearm_queue(struct ix_softc *, uint32_t);
>  
> +/* MSI-X (multiple vectors interrupt handlers)  */
> +int  ixgbe_link_intr(void *);
> +int  ixgbe_queue_intr(void *);
> +
>  /*********************************************************************
>   *  OpenBSD Device Interface Entry Points
>   *********************************************************************/
> @@ -190,6 +198,7 @@ struct cfattach ix_ca = {
>  };
>  
>  int ixgbe_smart_speed = ixgbe_smart_speed_on;
> +int ixgbe_enable_msix = 1;
>  
>  /*********************************************************************
>   *  Device identification routine
> @@ -292,7 +301,10 @@ ixgbe_attach(struct device *parent, stru
>       bcopy(sc->hw.mac.addr, sc->arpcom.ac_enaddr,
>           IXGBE_ETH_LENGTH_OF_ADDRESS);
>  
> -     error = ixgbe_allocate_legacy(sc);
> +     if (sc->msix > 1)
> +             error = ixgbe_allocate_msix(sc);
> +     else
> +             error = ixgbe_allocate_legacy(sc);
>       if (error)
>               goto err_late;
>  
> @@ -524,6 +536,7 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c
>                       ixgbe_disable_intr(sc);
>                       ixgbe_iff(sc);
>                       ixgbe_enable_intr(sc);
> +                     ixgbe_enable_queues(sc);
>               }
>               error = 0;
>       }
> @@ -819,6 +832,9 @@ ixgbe_init(void *arg)
>               itr |= IXGBE_EITR_LLI_MOD | IXGBE_EITR_CNT_WDIS;
>       IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(0), itr);
>  
> +     /* Set moderation on the Link interrupt */
> +     IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(sc->linkvec), IXGBE_LINK_ITR);
> +
>       /* Enable power to the phy */
>       if (sc->hw.phy.ops.set_phy_power)
>               sc->hw.phy.ops.set_phy_power(&sc->hw, TRUE);
> @@ -834,6 +850,7 @@ ixgbe_init(void *arg)
>  
>       /* And now turn on interrupts */
>       ixgbe_enable_intr(sc);
> +     ixgbe_enable_queues(sc);
>  
>       /* Now inform the stack we're ready */
>       ifp->if_flags |= IFF_RUNNING;
> @@ -964,6 +981,16 @@ ixgbe_enable_queue(struct ix_softc *sc, 
>  }
>  
>  void
> +ixgbe_enable_queues(struct ix_softc *sc)
> +{
> +     struct ix_queue *que;
> +     int i;
> +
> +     for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++)
> +             ixgbe_enable_queue(sc, que->msix);
> +}
> +
> +void
>  ixgbe_disable_queue(struct ix_softc *sc, uint32_t vector)
>  {
>       uint64_t queue = 1ULL << vector;
> @@ -982,6 +1009,41 @@ ixgbe_disable_queue(struct ix_softc *sc,
>       }
>  }
>  
> +/*
> + * MSIX Interrupt Handlers
> + */
> +int
> +ixgbe_link_intr(void *vsc)
> +{
> +     struct ix_softc *sc = (struct ix_softc *)vsc;
> +
> +     return ixgbe_intr(sc);
> +}
> +
> +int
> +ixgbe_queue_intr(void *vque)
> +{
> +     struct ix_queue *que = vque;
> +     struct ix_softc *sc = que->sc;
> +     struct ifnet    *ifp = &sc->arpcom.ac_if;
> +     struct tx_ring  *txr = sc->tx_rings;
> +
> +     if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> +             ixgbe_rxeof(que);
> +             ixgbe_txeof(txr);
> +             if (ixgbe_rxfill(que->rxr)) {
> +                     /* Advance the Rx Queue "Tail Pointer" */
> +                     IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(que->rxr->me),
> +                         que->rxr->last_desc_filled);
> +             } else
> +                     timeout_add(&sc->rx_refill, 1);
> +     }
> +
> +     ixgbe_enable_queue(sc, que->msix);
> +
> +     return (1);
> +}
> +
>  /*********************************************************************
>   *
>   *  Legacy Interrupt Service routine
> @@ -989,29 +1051,24 @@ ixgbe_disable_queue(struct ix_softc *sc,
>   **********************************************************************/
>  
>  int
> -ixgbe_intr(void *arg)
> +ixgbe_legacy_intr(void *arg)
>  {
>       struct ix_softc *sc = (struct ix_softc *)arg;
> -     struct ix_queue *que = sc->queues;
>       struct ifnet    *ifp = &sc->arpcom.ac_if;
>       struct tx_ring  *txr = sc->tx_rings;
> -     struct ixgbe_hw *hw = &sc->hw;
> -     uint32_t         reg_eicr, mod_mask, msf_mask;
> -     int              i, refill = 0;
> +     int rv;
>  
> -     reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
> -     if (reg_eicr == 0) {
> -             ixgbe_enable_intr(sc);
> +     rv = ixgbe_intr(sc);
> +     if (rv == 0) {
> +             ixgbe_enable_queues(sc);
>               return (0);
>       }
>  
>       if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> +             struct ix_queue *que = sc->queues;
> +
>               ixgbe_rxeof(que);
>               ixgbe_txeof(txr);
> -             refill = 1;
> -     }
> -
> -     if (refill) {
>               if (ixgbe_rxfill(que->rxr)) {
>                       /* Advance the Rx Queue "Tail Pointer" */
>                       IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(que->rxr->me),
> @@ -1020,6 +1077,22 @@ ixgbe_intr(void *arg)
>                       timeout_add(&sc->rx_refill, 1);
>       }
>  
> +     return (rv);
> +}
> +
> +int
> +ixgbe_intr(struct ix_softc *sc)
> +{
> +     struct ifnet    *ifp = &sc->arpcom.ac_if;
> +     struct ixgbe_hw *hw = &sc->hw;
> +     uint32_t         reg_eicr, mod_mask, msf_mask;
> +
> +     reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
> +     if (reg_eicr == 0) {
> +             ixgbe_enable_intr(sc);
> +             return (0);
> +     }
> +
>       /* Link status change */
>       if (reg_eicr & IXGBE_EICR_LSC) {
>               KERNEL_LOCK();
> @@ -1090,9 +1163,6 @@ ixgbe_intr(void *arg)
>               KERNEL_UNLOCK();
>       }
>  
> -     for (i = 0; i < sc->num_queues; i++, que++)
> -             ixgbe_enable_queue(sc, que->msix);
> -
>       return (1);
>  }
>  
> @@ -1626,7 +1696,7 @@ ixgbe_allocate_legacy(struct ix_softc *s
>  
>       intrstr = pci_intr_string(pc, ih);
>       sc->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> -         ixgbe_intr, sc, sc->dev.dv_xname);
> +         ixgbe_legacy_intr, sc, sc->dev.dv_xname);
>       if (sc->tag == NULL) {
>               printf(": couldn't establish interrupt");
>               if (intrstr != NULL)
> @@ -1642,6 +1712,92 @@ ixgbe_allocate_legacy(struct ix_softc *s
>       return (0);
>  }
>  
> +/*********************************************************************
> + *
> + *  Setup the MSI-X Interrupt handlers
> + *
> + **********************************************************************/
> +int
> +ixgbe_allocate_msix(struct ix_softc *sc)
> +{
> +     struct ixgbe_osdep      *os = &sc->osdep;
> +     struct pci_attach_args  *pa  = &os->os_pa;
> +     int                      vec, error = 0;
> +     struct ix_queue         *que = sc->queues;
> +     const char              *intrstr = NULL;
> +     pci_chipset_tag_t       pc = pa->pa_pc;
> +     pci_intr_handle_t       ih;
> +
> +     vec = 0;
> +     if (pci_intr_map_msix(pa, vec, &ih)) {
> +             printf(": couldn't map interrupt\n");
> +             return (ENXIO);
> +     }
> +
> +     que->msix = vec;
> +     snprintf(que->name, sizeof(que->name), "%s:%d", sc->dev.dv_xname, vec);
> +
> +     intrstr = pci_intr_string(pc, ih);
> +     que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> +         ixgbe_queue_intr, que, que->name);
> +     if (que->tag == NULL) {
> +             printf(": couldn't establish interrupt");
> +             if (intrstr != NULL)
> +                     printf(" at %s", intrstr);
> +             printf("\n");
> +             return (ENXIO);
> +     }
> +
> +     /* Now the link status/control last MSI-X vector */
> +     vec++;
> +     if (pci_intr_map_msix(pa, vec, &ih)) {
> +             printf(": couldn't map link vector\n");
> +             error = ENXIO;
> +             goto fail;
> +     }
> +
> +     intrstr = pci_intr_string(pc, ih);
> +     sc->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> +         ixgbe_link_intr, sc, sc->dev.dv_xname);
> +     if (sc->tag == NULL) {
> +             printf(": couldn't establish interrupt");
> +             if (intrstr != NULL)
> +                     printf(" at %s", intrstr);
> +             printf("\n");
> +             error = ENXIO;
> +             goto fail;
> +     }
> +
> +     sc->linkvec = vec;
> +     printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
> +
> +     return (0);
> +fail:
> +     pci_intr_disestablish(pc, que->tag);
> +     que->tag = NULL;
> +     return (error);
> +}
> +
> +int
> +ixgbe_setup_msix(struct ix_softc *sc)
> +{
> +     struct ixgbe_osdep      *os = &sc->osdep;
> +     struct pci_attach_args  *pa = &os->os_pa;
> +     pci_intr_handle_t        dummy;
> +
> +     if (!ixgbe_enable_msix)
> +             return (0);
> +
> +     /*
> +      * Try a dummy map, maybe this bus doesn't like MSI, this function
> +      * has no side effects.
> +      */
> +     if (pci_intr_map_msix(pa, 0, &dummy))
> +             return (0);
> +
> +     return (2); /* queue vector + link vector */
> +}
> +
>  int
>  ixgbe_allocate_pci_resources(struct ix_softc *sc)
>  {
> @@ -1666,10 +1822,8 @@ ixgbe_allocate_pci_resources(struct ix_s
>       sc->num_queues = 1;
>       sc->hw.back = os;
>  
> -#ifdef notyet
>       /* Now setup MSI or MSI/X, return us the number of supported vectors. */
>       sc->msix = ixgbe_setup_msix(sc);
> -#endif
>  
>       return (0);
>  }
> @@ -3085,9 +3239,7 @@ void
>  ixgbe_enable_intr(struct ix_softc *sc)
>  {
>       struct ixgbe_hw *hw = &sc->hw;
> -     struct ix_queue *que = sc->queues;
>       uint32_t        mask, fwsm;
> -     int i;
>  
>       mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
>       /* Enable Fan Failure detection */
> @@ -3135,14 +3287,6 @@ ixgbe_enable_intr(struct ix_softc *sc)
>               IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
>       }
>  
> -     /*
> -      * Now enable all queues, this is done separately to
> -      * allow for handling the extended (beyond 32) MSIX
> -      * vectors that can be used by 82599
> -      */
> -     for (i = 0; i < sc->num_queues; i++, que++)
> -             ixgbe_enable_queue(sc, que->msix);
> -
>       IXGBE_WRITE_FLUSH(hw);
>  }
>  
> @@ -3258,15 +3402,11 @@ ixgbe_set_ivar(struct ix_softc *sc, uint
>  void
>  ixgbe_configure_ivars(struct ix_softc *sc)
>  {
> -#if notyet
>       struct ix_queue *que = sc->queues;
>       uint32_t newitr;
>       int i;
>  
> -     if (ixgbe_max_interrupt_rate > 0)
> -             newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
> -     else
> -             newitr = 0;
> +     newitr = (4000000 / IXGBE_INTS_PER_SEC) & 0x0FF8;
>  
>       for (i = 0; i < sc->num_queues; i++, que++) {
>               /* First the RX queue entry */
> @@ -3280,7 +3420,6 @@ ixgbe_configure_ivars(struct ix_softc *s
>  
>       /* For the Link interrupt */
>       ixgbe_set_ivar(sc, 1, sc->linkvec, -1);
> -#endif
>  }
>  
>  /*
> Index: dev/pci/if_ix.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ix.h,v
> retrieving revision 1.37
> diff -u -p -r1.37 if_ix.h
> --- dev/pci/if_ix.h   2 Mar 2020 01:59:01 -0000       1.37
> +++ dev/pci/if_ix.h   2 Mar 2020 09:28:59 -0000
> @@ -120,6 +120,7 @@
>   * Interrupt Moderation parameters
>   */
>  #define IXGBE_INTS_PER_SEC           8000
> +#define IXGBE_LINK_ITR                       1000
>  
>  struct ixgbe_tx_buf {
>       uint32_t                eop_index;
> @@ -154,6 +155,8 @@ struct ix_queue {
>       uint32_t                msix;           /* This queue's MSIX vector */
>       uint32_t                eims;           /* This queue's EIMS bit */
>       uint32_t                eitr_setting;
> +     char                    name[8];
> +     pci_intr_handle_t       ih;
>       void                    *tag;
>       struct tx_ring          *txr;
>       struct rx_ring          *rxr;
> 

Reply via email to