On 17.3.2020. 13:16, Martin Pieuchot wrote:
> Diff below allows ix(4) and em(4) to establish two MSI-X handlers.
> Multiqueue support is intentionally not included in this diff.
>
Hi all,
if someone is willing to make em multiqueue i have
em0 at pci6 dev 0 function 0 "Intel 82576" rev 0x01, msix, 1 queue
em2 at pci7 dev 0 function 0 "Intel I210" rev 0x03, msix, 1 queue
em4 at pci11 dev 0 function 0 "Intel I350" rev 0x01, msix, 1 queue
for testing and i will look for 82574 and 82575
Thank you ....
> One handler is for the single queue (index ":0") and one for the
> link interrupt:
>
> # vmstat -i |grep ix0
> irq114/ix0:0 73178178 3758
> irq115/ix0 2 0
>
> A per-driver toggle allows to switch MSI-X support on/off. My plan is
> to commit with the switch "off" for the moment. Switching it to "on"
> should be better done in the beginning of a release cycle. However it
> is "on" in the diff below for testing purposes.
>
> The em(4) diff doesn't include support for 82575 nor 82574. If somebody
> has such hardware and is interested I can lift the necessary bits from
> FreeBSD. That said em_setup_queues_msix() contains references to 82575
> to help diffing with FreeBSD.
>
> The ix(4) diff cannot be committed right now as it breaks the build on
> architectures that define pci_intr_map_msix() to (-1). The compiler
> complains that the arguments of the macro are unused.
>
> /sys/dev/pci/if_ix.c:1789:21: error: unused variable 'dummy'
> [-Werror,-Wunused-variable]
> pci_intr_handle_t dummy;
> ^
> /sys/dev/pci/if_ix.c:1788:26: error: unused variable 'pa'
> [-Werror,-Wunused-variable]
> struct pci_attach_args *pa = &os->os_pa;
>
> So I'd like to know if that's the way to test if MSI-X can be used:
>
> /*
> * Try a dummy map, maybe this bus doesn't like MSI, this function
> * has no side effects.
> */
> if (pci_intr_map_msix(pa, 0, &dummy))
> return (0);
>
> I'd appreciate tests on many hardware as possible, especially for em(4).
> If you have been testing an earlier version of these diffs, please do
> it again :)
>
> Thanks,
> Martin
>
> Index: dev/pci/if_em.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.c,v
> retrieving revision 1.347
> diff -u -p -r1.347 if_em.c
> --- dev/pci/if_em.c 8 Mar 2020 11:43:43 -0000 1.347
> +++ dev/pci/if_em.c 17 Mar 2020 11:18:01 -0000
> @@ -233,6 +233,7 @@ void em_defer_attach(struct device*);
> int em_detach(struct device *, int);
> int em_activate(struct device *, int);
> int em_intr(void *);
> +int em_allocate_legacy(struct em_softc *);
> void em_start(struct ifqueue *);
> int em_ioctl(struct ifnet *, u_long, caddr_t);
> void em_watchdog(struct ifnet *);
> @@ -290,6 +291,13 @@ void em_flush_tx_ring(struct em_queue *)
> void em_flush_rx_ring(struct em_queue *);
> void em_flush_desc_rings(struct em_softc *);
>
> +/* MSIX/Multiqueue functions */
> +int em_allocate_msix(struct em_softc *);
> +int em_setup_queues_msix(struct em_softc *);
> +int em_queue_intr_msix(void *);
> +int em_link_intr_msix(void *);
> +void em_enable_queue_intr_msix(struct em_queue *);
> +
> /*********************************************************************
> * OpenBSD Device Interface Entry Points
> *********************************************************************/
> @@ -304,6 +312,7 @@ struct cfdriver em_cd = {
> };
>
> static int em_smart_pwr_down = FALSE;
> +int em_enable_msix = 1;
>
> /*********************************************************************
> * Device identification routine
> @@ -919,6 +928,14 @@ em_init(void *arg)
> }
> em_initialize_receive_unit(sc);
>
> + if (sc->msix) {
> + if (em_setup_queues_msix(sc)) {
> + printf("%s: Can't setup msix queues\n", DEVNAME(sc));
> + splx(s);
> + return;
> + }
> + }
> +
> /* Program promiscuous mode and multicast filters. */
> em_iff(sc);
>
> @@ -1617,10 +1634,7 @@ int
> em_allocate_pci_resources(struct em_softc *sc)
> {
> int val, rid;
> - pci_intr_handle_t ih;
> - const char *intrstr = NULL;
> struct pci_attach_args *pa = &sc->osdep.em_pa;
> - pci_chipset_tag_t pc = pa->pa_pc;
> struct em_queue *que = NULL;
>
> val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA);
> @@ -1691,6 +1705,9 @@ em_allocate_pci_resources(struct em_soft
> }
> }
>
> + sc->osdep.dev = (struct device *)sc;
> + sc->hw.back = &sc->osdep;
> +
> /* Only one queue for the moment. */
> que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
> if (que == NULL) {
> @@ -1703,29 +1720,10 @@ em_allocate_pci_resources(struct em_soft
>
> sc->queues = que;
> sc->num_queues = 1;
> + sc->msix = 0;
> sc->legacy_irq = 0;
> - if (pci_intr_map_msi(pa, &ih)) {
> - if (pci_intr_map(pa, &ih)) {
> - printf(": couldn't map interrupt\n");
> - return (ENXIO);
> - }
> - sc->legacy_irq = 1;
> - }
> -
> - sc->osdep.dev = (struct device *)sc;
> - sc->hw.back = &sc->osdep;
> -
> - intrstr = pci_intr_string(pc, ih);
> - sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> - em_intr, sc, DEVNAME(sc));
> - if (sc->sc_intrhand == NULL) {
> - printf(": couldn't establish interrupt");
> - if (intrstr != NULL)
> - printf(" at %s", intrstr);
> - printf("\n");
> + if (em_allocate_msix(sc) && em_allocate_legacy(sc))
> return (ENXIO);
> - }
> - printf(": %s", intrstr);
>
> /*
> * the ICP_xxxx device has multiple, duplicate register sets for
> @@ -1784,6 +1782,15 @@ em_free_pci_resources(struct em_softc *s
> que->tx.sc_tx_desc_ring = NULL;
> em_dma_free(sc, &que->tx.sc_tx_dma);
> }
> + if (que->tag)
> + pci_intr_disestablish(pc, que->tag);
> + que->tag = NULL;
> + que->eims = 0;
> + que->me = 0;
> + que->sc = NULL;
> + sc->legacy_irq = 0;
> + sc->msix_linkvec = 0;
> + sc->msix_queuesmask = 0;
> if (sc->queues)
> free(sc->queues, M_DEVBUF,
> sc->num_queues * sizeof(struct em_queue));
> @@ -1971,6 +1978,7 @@ em_setup_interface(struct em_softc *sc)
>
> if_attach(ifp);
> ether_ifattach(ifp);
> + em_enable_intr(sc);
> }
>
> int
> @@ -3017,7 +3025,16 @@ em_enable_hw_vlans(struct em_softc *sc)
> void
> em_enable_intr(struct em_softc *sc)
> {
> - E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
> + uint32_t mask;
> +
> + if (sc->msix) {
> + mask = sc->msix_queuesmask | sc->msix_linkmask;
> + E1000_WRITE_REG(&sc->hw, EIAC, mask);
> + E1000_WRITE_REG(&sc->hw, EIAM, mask);
> + E1000_WRITE_REG(&sc->hw, EIMS, mask);
> + E1000_WRITE_REG(&sc->hw, IMS, E1000_IMS_LSC);
> + } else
> + E1000_WRITE_REG(&sc->hw, IMS, (IMS_ENABLE_MASK));
> }
>
> void
> @@ -3031,8 +3048,10 @@ em_disable_intr(struct em_softc *sc)
> * For this to work correctly the Sequence error interrupt had
> * to be enabled all the time.
> */
> -
> - if (sc->hw.mac_type == em_82542_rev2_0)
> + if (sc->msix) {
> + E1000_WRITE_REG(&sc->hw, EIMC, ~0);
> + E1000_WRITE_REG(&sc->hw, EIAC, 0);
> + } else if (sc->hw.mac_type == em_82542_rev2_0)
> E1000_WRITE_REG(&sc->hw, IMC, (0xffffffff & ~E1000_IMC_RXSEQ));
> else
> E1000_WRITE_REG(&sc->hw, IMC, 0xffffffff);
> @@ -3506,6 +3525,264 @@ em_print_hw_stats(struct em_softc *sc)
> }
> #endif
> #endif /* !SMALL_KERNEL */
> +
> +int
> +em_allocate_legacy(struct em_softc *sc)
> +{
> + pci_intr_handle_t ih;
> + const char *intrstr = NULL;
> + struct pci_attach_args *pa = &sc->osdep.em_pa;
> + pci_chipset_tag_t pc = pa->pa_pc;
> +
> + if (pci_intr_map_msi(pa, &ih)) {
> + if (pci_intr_map(pa, &ih)) {
> + printf(": couldn't map interrupt\n");
> + return (ENXIO);
> + }
> + sc->legacy_irq = 1;
> + }
> +
> + intrstr = pci_intr_string(pc, ih);
> + sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> + em_intr, sc, DEVNAME(sc));
> + if (sc->sc_intrhand == NULL) {
> + printf(": couldn't establish interrupt");
> + if (intrstr != NULL)
> + printf(" at %s", intrstr);
> + printf("\n");
> + return (ENXIO);
> + }
> + printf(": %s", intrstr);
> +
> + return (0);
> +}
> +
> +int
> +em_allocate_msix(struct em_softc *sc)
> +{
> + pci_intr_handle_t ih;
> + const char *intrstr = NULL;
> + struct pci_attach_args *pa = &sc->osdep.em_pa;
> + pci_chipset_tag_t pc = pa->pa_pc;
> + struct em_queue *que = sc->queues; /* Use only first queue. */
> + int vec;
> +
> + if (!em_enable_msix)
> + return (ENODEV);
> +
> + switch (sc->hw.mac_type) {
> + case em_82576:
> + case em_82580:
> + case em_i350:
> + case em_i210:
> + break;
> + default:
> + return (ENODEV);
> + }
> +
> + vec = 0;
> + if (pci_intr_map_msix(pa, vec, &ih))
> + return (ENODEV);
> + sc->msix = 1;
> +
> + que->me = vec;
> + que->eims = 1 << vec;
> + snprintf(que->name, sizeof(que->name), "%s:%d", DEVNAME(sc), vec);
> +
> + intrstr = pci_intr_string(pc, ih);
> + que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> + em_queue_intr_msix, que, que->name);
> + if (que->tag == NULL) {
> + printf(": couldn't establish interrupt");
> + if (intrstr != NULL)
> + printf(" at %s", intrstr);
> + printf("\n");
> + return (ENXIO);
> + }
> +
> + /* Setup linkvector, use last queue vector + 1 */
> + vec++;
> + sc->msix_linkvec = vec;
> + if (pci_intr_map_msix(pa, sc->msix_linkvec, &ih)) {
> + printf(": couldn't map link vector\n");
> + return (ENXIO);
> + }
> +
> + intrstr = pci_intr_string(pc, ih);
> + sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> + em_link_intr_msix, sc, DEVNAME(sc));
> + if (sc->sc_intrhand == NULL) {
> + printf(": couldn't establish interrupt");
> + if (intrstr != NULL)
> + printf(" at %s", intrstr);
> + printf("\n");
> + return (ENXIO);
> + }
> + printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
> +
> + return (0);
> +}
> +
> +/*
> + * Interrupt for a specific queue, (not link interrupts). The EICR bit which
> + * maps to the EIMS bit expresses both RX and TX, therefore we can't
> + * distringuish if this is a RX completion of TX completion and must do both.
> + * The bits in EICR are autocleared and we _cannot_ read EICR.
> + */
> +int
> +em_queue_intr_msix(void *vque)
> +{
> + struct em_queue *que = vque;
> + struct em_softc *sc = que->sc;
> + struct ifnet *ifp = &sc->sc_ac.ac_if;
> +
> + if (ifp->if_flags & IFF_RUNNING) {
> + em_txeof(que);
> + if (em_rxeof(que))
> + em_rxrefill(que);
> + }
> +
> + em_enable_queue_intr_msix(que);
> +
> + return (1);
> +}
> +
> +int
> +em_link_intr_msix(void *arg)
> +{
> + struct em_softc *sc = arg;
> + uint32_t icr;
> +
> + icr = E1000_READ_REG(&sc->hw, ICR);
> +
> + /* Link status change */
> + if (icr & E1000_ICR_LSC) {
> + KERNEL_LOCK();
> + sc->hw.get_link_status = 1;
> + em_check_for_link(&sc->hw);
> + em_update_link_status(sc);
> + KERNEL_UNLOCK();
> + }
> +
> + /* Re-arm unconditionally */
> + E1000_WRITE_REG(&sc->hw, IMS, E1000_ICR_LSC);
> + E1000_WRITE_REG(&sc->hw, EIMS, sc->msix_linkmask);
> +
> + return (1);
> +}
> +
> +/*
> + * Maps queues into msix interrupt vectors.
> + */
> +int
> +em_setup_queues_msix(struct em_softc *sc)
> +{
> + struct em_queue *que = sc->queues; /* Use only first queue. */
> + uint32_t ivar, newitr, index;
> +
> + KASSERT(sc->msix);
> +
> + /* First turn on RSS capability */
> + if (sc->hw.mac_type != em_82575)
> + E1000_WRITE_REG(&sc->hw, GPIE,
> + E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
> + E1000_GPIE_PBA | E1000_GPIE_NSICR);
> +
> + /* Turn on MSIX */
> + switch (sc->hw.mac_type) {
> + case em_82580:
> + case em_i350:
> + case em_i210:
> + /* RX entries */
> + /*
> + * Note, this maps Queues into MSIX vectors, it works fine.
> + * The funky calculation of offsets and checking if que->me is
> + * odd is due to the weird register distribution, the datasheet
> + * explains it well.
> + */
> + index = que->me >> 1;
> + ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> + if (que->me & 1) {
> + ivar &= 0xFF00FFFF;
> + ivar |= (que->me | E1000_IVAR_VALID) << 16;
> + } else {
> + ivar &= 0xFFFFFF00;
> + ivar |= que->me | E1000_IVAR_VALID;
> + }
> + E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> +
> + /* TX entries */
> + index = que->me >> 1;
> + ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> + if (que->me & 1) {
> + ivar &= 0x00FFFFFF;
> + ivar |= (que->me | E1000_IVAR_VALID) << 24;
> + } else {
> + ivar &= 0xFFFF00FF;
> + ivar |= (que->me | E1000_IVAR_VALID) << 8;
> + }
> + E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> + sc->msix_queuesmask |= que->eims;
> +
> + /* And for the link interrupt */
> + ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
> + sc->msix_linkmask = 1 << sc->msix_linkvec;
> + E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
> + break;
> + case em_82576:
> + /* RX entries */
> + index = que->me & 0x7; /* Each IVAR has two entries */
> + ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> + if (que->me < 8) {
> + ivar &= 0xFFFFFF00;
> + ivar |= que->me | E1000_IVAR_VALID;
> + } else {
> + ivar &= 0xFF00FFFF;
> + ivar |= (que->me | E1000_IVAR_VALID) << 16;
> + }
> + E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> + sc->msix_queuesmask |= que->eims;
> + /* TX entries */
> + index = que->me & 0x7; /* Each IVAR has two entries */
> + ivar = E1000_READ_REG_ARRAY(&sc->hw, IVAR0, index);
> + if (que->me < 8) {
> + ivar &= 0xFFFF00FF;
> + ivar |= (que->me | E1000_IVAR_VALID) << 8;
> + } else {
> + ivar &= 0x00FFFFFF;
> + ivar |= (que->me | E1000_IVAR_VALID) << 24;
> + }
> + E1000_WRITE_REG_ARRAY(&sc->hw, IVAR0, index, ivar);
> + sc->msix_queuesmask |= que->eims;
> +
> + /* And for the link interrupt */
> + ivar = (sc->msix_linkvec | E1000_IVAR_VALID) << 8;
> + sc->msix_linkmask = 1 << sc->msix_linkvec;
> + E1000_WRITE_REG(&sc->hw, IVAR_MISC, ivar);
> + break;
> + default:
> + panic("unsupported mac");
> + break;
> + }
> +
> + /* Set the starting interrupt rate */
> + newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
> +
> + if (sc->hw.mac_type == em_82575)
> + newitr |= newitr << 16;
> + else
> + newitr |= E1000_EITR_CNT_IGNR;
> +
> + E1000_WRITE_REG(&sc->hw, EITR(que->me), newitr);
> +
> + return (0);
> +}
> +
> +void
> +em_enable_queue_intr_msix(struct em_queue *que)
> +{
> + E1000_WRITE_REG(&que->sc->hw, EIMS, que->eims);
> +}
>
> int
> em_allocate_desc_rings(struct em_softc *sc)
> Index: dev/pci/if_em.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.h,v
> retrieving revision 1.75
> diff -u -p -r1.75 if_em.h
> --- dev/pci/if_em.h 20 Feb 2020 09:32:49 -0000 1.75
> +++ dev/pci/if_em.h 9 Mar 2020 10:48:15 -0000
> @@ -358,7 +358,9 @@ struct em_softc;
> struct em_queue {
> struct em_softc *sc;
> uint32_t me; /* queue index, also msix vector */
> -
> + uint32_t eims; /* msix only */
> + void *tag; /* NULL in legacy, check sc_intrhand */
> + char name[8];
> struct em_tx tx;
> struct em_rx rx;
>
> @@ -433,6 +435,10 @@ struct em_softc {
> boolean_t pcix_82544;
> struct em_hw_stats stats;
>
> + int msix;
> + uint32_t msix_linkvec;
> + uint32_t msix_linkmask;
> + uint32_t msix_queuesmask;
> int num_queues;
> struct em_queue *queues;
> };
> Index: dev/pci/if_em_hw.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
> retrieving revision 1.81
> diff -u -p -r1.81 if_em_hw.h
> --- dev/pci/if_em_hw.h 8 Mar 2020 11:43:43 -0000 1.81
> +++ dev/pci/if_em_hw.h 17 Mar 2020 10:58:49 -0000
> @@ -2033,6 +2033,11 @@ struct em_hw {
> #define E1000_RXDCTL_THRESH_UNIT_DESC 0x1000000
> #define E1000_RXDCTL_QUEUE_ENABLE 0x2000000
>
> +#define E1000_EITR_ITR_INT_MASK 0x0000FFFF
> +/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
> +#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */
> +#define E1000_EITR_INTERVAL 0x00007FFC
> +
> /* Transmit Descriptor Control */
> #define E1000_TXDCTL_PTHRESH 0x000000FF /* TXDCTL Prefetch Threshold */
> #define E1000_TXDCTL_HTHRESH 0x0000FF00 /* TXDCTL Host Threshold */
> Index: dev/pci/if_ix.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
> retrieving revision 1.161
> diff -u -p -r1.161 if_ix.c
> --- dev/pci/if_ix.c 2 Mar 2020 01:59:01 -0000 1.161
> +++ dev/pci/if_ix.c 17 Mar 2020 11:41:58 -0000
> @@ -114,6 +114,8 @@ int ixgbe_media_change(struct ifnet *);
> void ixgbe_identify_hardware(struct ix_softc *);
> int ixgbe_allocate_pci_resources(struct ix_softc *);
> int ixgbe_allocate_legacy(struct ix_softc *);
> +int ixgbe_allocate_msix(struct ix_softc *);
> +int ixgbe_setup_msix(struct ix_softc *);
> int ixgbe_allocate_queues(struct ix_softc *);
> void ixgbe_free_pci_resources(struct ix_softc *);
> void ixgbe_local_timer(void *);
> @@ -140,6 +142,7 @@ void ixgbe_initialize_rss_mapping(struct
> int ixgbe_rxfill(struct rx_ring *);
> void ixgbe_rxrefill(void *);
>
> +int ixgbe_intr(struct ix_softc *sc);
> void ixgbe_enable_intr(struct ix_softc *);
> void ixgbe_disable_intr(struct ix_softc *);
> void ixgbe_update_stats_counters(struct ix_softc *);
> @@ -172,11 +175,16 @@ void ixgbe_handle_msf(struct ix_softc *)
> void ixgbe_handle_phy(struct ix_softc *);
>
> /* Legacy (single vector interrupt handler */
> -int ixgbe_intr(void *);
> +int ixgbe_legacy_intr(void *);
> void ixgbe_enable_queue(struct ix_softc *, uint32_t);
> +void ixgbe_enable_queues(struct ix_softc *);
> void ixgbe_disable_queue(struct ix_softc *, uint32_t);
> void ixgbe_rearm_queue(struct ix_softc *, uint32_t);
>
> +/* MSI-X (multiple vectors interrupt handlers) */
> +int ixgbe_link_intr(void *);
> +int ixgbe_queue_intr(void *);
> +
> /*********************************************************************
> * OpenBSD Device Interface Entry Points
> *********************************************************************/
> @@ -190,6 +198,7 @@ struct cfattach ix_ca = {
> };
>
> int ixgbe_smart_speed = ixgbe_smart_speed_on;
> +int ixgbe_enable_msix = 1;
>
> /*********************************************************************
> * Device identification routine
> @@ -292,7 +301,10 @@ ixgbe_attach(struct device *parent, stru
> bcopy(sc->hw.mac.addr, sc->arpcom.ac_enaddr,
> IXGBE_ETH_LENGTH_OF_ADDRESS);
>
> - error = ixgbe_allocate_legacy(sc);
> + if (sc->msix > 1)
> + error = ixgbe_allocate_msix(sc);
> + else
> + error = ixgbe_allocate_legacy(sc);
> if (error)
> goto err_late;
>
> @@ -524,6 +536,7 @@ ixgbe_ioctl(struct ifnet * ifp, u_long c
> ixgbe_disable_intr(sc);
> ixgbe_iff(sc);
> ixgbe_enable_intr(sc);
> + ixgbe_enable_queues(sc);
> }
> error = 0;
> }
> @@ -819,6 +832,9 @@ ixgbe_init(void *arg)
> itr |= IXGBE_EITR_LLI_MOD | IXGBE_EITR_CNT_WDIS;
> IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(0), itr);
>
> + /* Set moderation on the Link interrupt */
> + IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(sc->linkvec), IXGBE_LINK_ITR);
> +
> /* Enable power to the phy */
> if (sc->hw.phy.ops.set_phy_power)
> sc->hw.phy.ops.set_phy_power(&sc->hw, TRUE);
> @@ -834,6 +850,7 @@ ixgbe_init(void *arg)
>
> /* And now turn on interrupts */
> ixgbe_enable_intr(sc);
> + ixgbe_enable_queues(sc);
>
> /* Now inform the stack we're ready */
> ifp->if_flags |= IFF_RUNNING;
> @@ -964,6 +981,16 @@ ixgbe_enable_queue(struct ix_softc *sc,
> }
>
> void
> +ixgbe_enable_queues(struct ix_softc *sc)
> +{
> + struct ix_queue *que;
> + int i;
> +
> + for (i = 0, que = sc->queues; i < sc->num_queues; i++, que++)
> + ixgbe_enable_queue(sc, que->msix);
> +}
> +
> +void
> ixgbe_disable_queue(struct ix_softc *sc, uint32_t vector)
> {
> uint64_t queue = 1ULL << vector;
> @@ -982,6 +1009,41 @@ ixgbe_disable_queue(struct ix_softc *sc,
> }
> }
>
> +/*
> + * MSIX Interrupt Handlers
> + */
> +int
> +ixgbe_link_intr(void *vsc)
> +{
> + struct ix_softc *sc = (struct ix_softc *)vsc;
> +
> + return ixgbe_intr(sc);
> +}
> +
> +int
> +ixgbe_queue_intr(void *vque)
> +{
> + struct ix_queue *que = vque;
> + struct ix_softc *sc = que->sc;
> + struct ifnet *ifp = &sc->arpcom.ac_if;
> + struct tx_ring *txr = sc->tx_rings;
> +
> + if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> + ixgbe_rxeof(que);
> + ixgbe_txeof(txr);
> + if (ixgbe_rxfill(que->rxr)) {
> + /* Advance the Rx Queue "Tail Pointer" */
> + IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(que->rxr->me),
> + que->rxr->last_desc_filled);
> + } else
> + timeout_add(&sc->rx_refill, 1);
> + }
> +
> + ixgbe_enable_queue(sc, que->msix);
> +
> + return (1);
> +}
> +
> /*********************************************************************
> *
> * Legacy Interrupt Service routine
> @@ -989,29 +1051,24 @@ ixgbe_disable_queue(struct ix_softc *sc,
> **********************************************************************/
>
> int
> -ixgbe_intr(void *arg)
> +ixgbe_legacy_intr(void *arg)
> {
> struct ix_softc *sc = (struct ix_softc *)arg;
> - struct ix_queue *que = sc->queues;
> struct ifnet *ifp = &sc->arpcom.ac_if;
> struct tx_ring *txr = sc->tx_rings;
> - struct ixgbe_hw *hw = &sc->hw;
> - uint32_t reg_eicr, mod_mask, msf_mask;
> - int i, refill = 0;
> + int rv;
>
> - reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
> - if (reg_eicr == 0) {
> - ixgbe_enable_intr(sc);
> + rv = ixgbe_intr(sc);
> + if (rv == 0) {
> + ixgbe_enable_queues(sc);
> return (0);
> }
>
> if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> + struct ix_queue *que = sc->queues;
> +
> ixgbe_rxeof(que);
> ixgbe_txeof(txr);
> - refill = 1;
> - }
> -
> - if (refill) {
> if (ixgbe_rxfill(que->rxr)) {
> /* Advance the Rx Queue "Tail Pointer" */
> IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(que->rxr->me),
> @@ -1020,6 +1077,22 @@ ixgbe_intr(void *arg)
> timeout_add(&sc->rx_refill, 1);
> }
>
> + return (rv);
> +}
> +
> +int
> +ixgbe_intr(struct ix_softc *sc)
> +{
> + struct ifnet *ifp = &sc->arpcom.ac_if;
> + struct ixgbe_hw *hw = &sc->hw;
> + uint32_t reg_eicr, mod_mask, msf_mask;
> +
> + reg_eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
> + if (reg_eicr == 0) {
> + ixgbe_enable_intr(sc);
> + return (0);
> + }
> +
> /* Link status change */
> if (reg_eicr & IXGBE_EICR_LSC) {
> KERNEL_LOCK();
> @@ -1090,9 +1163,6 @@ ixgbe_intr(void *arg)
> KERNEL_UNLOCK();
> }
>
> - for (i = 0; i < sc->num_queues; i++, que++)
> - ixgbe_enable_queue(sc, que->msix);
> -
> return (1);
> }
>
> @@ -1626,7 +1696,7 @@ ixgbe_allocate_legacy(struct ix_softc *s
>
> intrstr = pci_intr_string(pc, ih);
> sc->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> - ixgbe_intr, sc, sc->dev.dv_xname);
> + ixgbe_legacy_intr, sc, sc->dev.dv_xname);
> if (sc->tag == NULL) {
> printf(": couldn't establish interrupt");
> if (intrstr != NULL)
> @@ -1642,6 +1712,92 @@ ixgbe_allocate_legacy(struct ix_softc *s
> return (0);
> }
>
> +/*********************************************************************
> + *
> + * Setup the MSI-X Interrupt handlers
> + *
> + **********************************************************************/
> +int
> +ixgbe_allocate_msix(struct ix_softc *sc)
> +{
> + struct ixgbe_osdep *os = &sc->osdep;
> + struct pci_attach_args *pa = &os->os_pa;
> + int vec, error = 0;
> + struct ix_queue *que = sc->queues;
> + const char *intrstr = NULL;
> + pci_chipset_tag_t pc = pa->pa_pc;
> + pci_intr_handle_t ih;
> +
> + vec = 0;
> + if (pci_intr_map_msix(pa, vec, &ih)) {
> + printf(": couldn't map interrupt\n");
> + return (ENXIO);
> + }
> +
> + que->msix = vec;
> + snprintf(que->name, sizeof(que->name), "%s:%d", sc->dev.dv_xname, vec);
> +
> + intrstr = pci_intr_string(pc, ih);
> + que->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> + ixgbe_queue_intr, que, que->name);
> + if (que->tag == NULL) {
> + printf(": couldn't establish interrupt");
> + if (intrstr != NULL)
> + printf(" at %s", intrstr);
> + printf("\n");
> + return (ENXIO);
> + }
> +
> + /* Now the link status/control last MSI-X vector */
> + vec++;
> + if (pci_intr_map_msix(pa, vec, &ih)) {
> + printf(": couldn't map link vector\n");
> + error = ENXIO;
> + goto fail;
> + }
> +
> + intrstr = pci_intr_string(pc, ih);
> + sc->tag = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE,
> + ixgbe_link_intr, sc, sc->dev.dv_xname);
> + if (sc->tag == NULL) {
> + printf(": couldn't establish interrupt");
> + if (intrstr != NULL)
> + printf(" at %s", intrstr);
> + printf("\n");
> + error = ENXIO;
> + goto fail;
> + }
> +
> + sc->linkvec = vec;
> + printf(", %s, %d queue%s", intrstr, vec, (vec > 1) ? "s" : "");
> +
> + return (0);
> +fail:
> + pci_intr_disestablish(pc, que->tag);
> + que->tag = NULL;
> + return (error);
> +}
> +
> +int
> +ixgbe_setup_msix(struct ix_softc *sc)
> +{
> + struct ixgbe_osdep *os = &sc->osdep;
> + struct pci_attach_args *pa = &os->os_pa;
> + pci_intr_handle_t dummy;
> +
> + if (!ixgbe_enable_msix)
> + return (0);
> +
> + /*
> + * Try a dummy map, maybe this bus doesn't like MSI, this function
> + * has no side effects.
> + */
> + if (pci_intr_map_msix(pa, 0, &dummy))
> + return (0);
> +
> + return (2); /* queue vector + link vector */
> +}
> +
> int
> ixgbe_allocate_pci_resources(struct ix_softc *sc)
> {
> @@ -1666,10 +1822,8 @@ ixgbe_allocate_pci_resources(struct ix_s
> sc->num_queues = 1;
> sc->hw.back = os;
>
> -#ifdef notyet
> /* Now setup MSI or MSI/X, return us the number of supported vectors. */
> sc->msix = ixgbe_setup_msix(sc);
> -#endif
>
> return (0);
> }
> @@ -3085,9 +3239,7 @@ void
> ixgbe_enable_intr(struct ix_softc *sc)
> {
> struct ixgbe_hw *hw = &sc->hw;
> - struct ix_queue *que = sc->queues;
> uint32_t mask, fwsm;
> - int i;
>
> mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
> /* Enable Fan Failure detection */
> @@ -3135,14 +3287,6 @@ ixgbe_enable_intr(struct ix_softc *sc)
> IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
> }
>
> - /*
> - * Now enable all queues, this is done separately to
> - * allow for handling the extended (beyond 32) MSIX
> - * vectors that can be used by 82599
> - */
> - for (i = 0; i < sc->num_queues; i++, que++)
> - ixgbe_enable_queue(sc, que->msix);
> -
> IXGBE_WRITE_FLUSH(hw);
> }
>
> @@ -3258,15 +3402,11 @@ ixgbe_set_ivar(struct ix_softc *sc, uint
> void
> ixgbe_configure_ivars(struct ix_softc *sc)
> {
> -#if notyet
> struct ix_queue *que = sc->queues;
> uint32_t newitr;
> int i;
>
> - if (ixgbe_max_interrupt_rate > 0)
> - newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
> - else
> - newitr = 0;
> + newitr = (4000000 / IXGBE_INTS_PER_SEC) & 0x0FF8;
>
> for (i = 0; i < sc->num_queues; i++, que++) {
> /* First the RX queue entry */
> @@ -3280,7 +3420,6 @@ ixgbe_configure_ivars(struct ix_softc *s
>
> /* For the Link interrupt */
> ixgbe_set_ivar(sc, 1, sc->linkvec, -1);
> -#endif
> }
>
> /*
> Index: dev/pci/if_ix.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ix.h,v
> retrieving revision 1.37
> diff -u -p -r1.37 if_ix.h
> --- dev/pci/if_ix.h 2 Mar 2020 01:59:01 -0000 1.37
> +++ dev/pci/if_ix.h 2 Mar 2020 09:28:59 -0000
> @@ -120,6 +120,7 @@
> * Interrupt Moderation parameters
> */
> #define IXGBE_INTS_PER_SEC 8000
> +#define IXGBE_LINK_ITR 1000
>
> struct ixgbe_tx_buf {
> uint32_t eop_index;
> @@ -154,6 +155,8 @@ struct ix_queue {
> uint32_t msix; /* This queue's MSIX vector */
> uint32_t eims; /* This queue's EIMS bit */
> uint32_t eitr_setting;
> + char name[8];
> + pci_intr_handle_t ih;
> void *tag;
> struct tx_ring *txr;
> struct rx_ring *rxr;
>