Re: em(4) towards multiqueues

2020-02-18 Thread Martin Pieuchot
On 18/02/20(Tue) 22:39, Jonathan Matthew wrote:
> On Fri, Feb 14, 2020 at 06:28:20PM +0100, Martin Pieuchot wrote:
> > @@ -2597,13 +2635,6 @@ em_initialize_receive_unit(struct em_sof
> > E1000_WRITE_REG(>hw, ITR, DEFAULT_ITR);
> > }
> >  
> > -   /* Setup the Base and Length of the Rx Descriptor Ring */
> > -   bus_addr = sc->sc_rx_dma.dma_map->dm_segs[0].ds_addr;
> > -   E1000_WRITE_REG(>hw, RDLEN(0),
> > -   sc->sc_rx_slots * sizeof(*sc->sc_rx_desc_ring));
> > -   E1000_WRITE_REG(>hw, RDBAH(0), (u_int32_t)(bus_addr >> 32));
> > -   E1000_WRITE_REG(>hw, RDBAL(0), (u_int32_t)bus_addr);
> > -
> > /* Setup the Receive Control Register */
> > reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
> > E1000_RCTL_RDMTS_HALF |
> > @@ -2653,6 +2684,13 @@ em_initialize_receive_unit(struct em_sof
> > if (sc->hw.mac_type == em_82573)
> > E1000_WRITE_REG(>hw, RDTR, 0x20);
> >  
> > +   /* Setup the Base and Length of the Rx Descriptor Ring */
> > +   bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
> > +   E1000_WRITE_REG(>hw, RDLEN(0),
> > +   sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
> > +   E1000_WRITE_REG(>hw, RDBAH(0), (u_int32_t)(bus_addr >> 32));
> > +   E1000_WRITE_REG(>hw, RDBAL(0), (u_int32_t)bus_addr);
> > +
> 
> Just to make sure that I follow how this works, should we be setting
> RDLEN(que->me) rather than RDLEN(0) here, and likewise for RDBAH and RDBAL?
> This value is always 0 at this stage, so of course it doesn't matter yet.

Nice catch!  Diff below fixes these :o)

Index: dev/pci/if_em.c
===
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.344
diff -u -p -r1.344 if_em.c
--- dev/pci/if_em.c 4 Feb 2020 10:59:23 -   1.344
+++ dev/pci/if_em.c 18 Feb 2020 13:04:19 -
@@ -257,25 +257,25 @@ void em_free_transmit_structures(struct 
 void em_free_receive_structures(struct em_softc *);
 void em_update_stats_counters(struct em_softc *);
 void em_disable_aspm(struct em_softc *);
-void em_txeof(struct em_softc *);
+void em_txeof(struct em_queue *);
 int  em_allocate_receive_structures(struct em_softc *);
 int  em_allocate_transmit_structures(struct em_softc *);
 int  em_allocate_desc_rings(struct em_softc *);
-int  em_rxfill(struct em_softc *);
+int  em_rxfill(struct em_queue *);
 void em_rxrefill(void *);
-int  em_rxeof(struct em_softc *);
+int  em_rxeof(struct em_queue *);
 void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
 struct mbuf *);
-u_int  em_transmit_checksum_setup(struct em_softc *, struct mbuf *, u_int,
+u_int  em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
u_int32_t *, u_int32_t *);
 void em_iff(struct em_softc *);
 #ifdef EM_DEBUG
 void em_print_hw_stats(struct em_softc *);
 #endif
 void em_update_link_status(struct em_softc *);
-int  em_get_buf(struct em_softc *, int);
+int  em_get_buf(struct em_queue *, int);
 void em_enable_hw_vlans(struct em_softc *);
-u_int em_encap(struct em_softc *, struct mbuf *);
+u_int em_encap(struct em_queue *, struct mbuf *);
 void em_smartspeed(struct em_softc *);
 int  em_82547_fifo_workaround(struct em_softc *, int);
 void em_82547_update_fifo_head(struct em_softc *, int);
@@ -286,8 +286,8 @@ int  em_dma_malloc(struct em_softc *, bu
 void em_dma_free(struct em_softc *, struct em_dma_alloc *);
 u_int32_t em_fill_descriptors(u_int64_t address, u_int32_t length,
  PDESC_ARRAY desc_array);
-void em_flush_tx_ring(struct em_softc *);
-void em_flush_rx_ring(struct em_softc *);
+void em_flush_tx_ring(struct em_queue *);
+void em_flush_rx_ring(struct em_queue *);
 void em_flush_desc_rings(struct em_softc *);
 
 /*
@@ -383,7 +383,6 @@ em_attach(struct device *parent, struct 
 
timeout_set(>timer_handle, em_local_timer, sc);
timeout_set(>tx_fifo_timer_handle, em_82547_move_tail, sc);
-   timeout_set(>rx_refill, em_rxrefill, sc);
 
/* Determine hardware revision */
em_identify_hardware(sc);
@@ -601,6 +600,7 @@ em_start(struct ifqueue *ifq)
u_int head, free, used;
struct mbuf *m;
int post = 0;
+   struct em_queue *que = sc->queues; /* Use only first queue. */
 
if (!sc->link_active) {
ifq_purge(ifq);
@@ -608,15 +608,15 @@ em_start(struct ifqueue *ifq)
}
 
/* calculate free space */
-   head = sc->sc_tx_desc_head;
-   free = sc->sc_tx_desc_tail;
+   head = que->tx.sc_tx_desc_head;
+   free = que->tx.sc_tx_desc_tail;
if (free <= head)
free += sc->sc_tx_slots;
free -= head;
 
if (sc->hw.mac_type != em_82547) {
-   bus_dmamap_sync(sc->sc_dmat, sc->sc_tx_dma.dma_map,
-   0, sc->sc_tx_dma.dma_map->dm_mapsize,
+   bus_dmamap_sync(sc->sc_dmat, 

Re: em(4) towards multiqueues

2020-02-18 Thread Jonathan Matthew
On Fri, Feb 14, 2020 at 06:28:20PM +0100, Martin Pieuchot wrote:
> This diff introduces the concept of "queue" in the em(4) driver.  The
> logic present in ix(4) has been matched for coherency.
> 
> Currently the driver uses a single queue and the diff below doesn't
> change anything in that regard.  It can be viewed as the introduction
> of an abstraction.
> 
> I'd like to get this in to reduce the differences between em(4) and
> ix(4) when it comes to multiqueues.  My intend is to keep the upcoming
> changes as small as possible such that we can concentrate our efforts on
> the important bits.  So far we started discussing the interaction between
> CPU and mapping MSIX vectors but other related topics will appear :o)
> 
> I'm running this on:
> 
>   em0 at pci1 dev 0 function 0 "Intel I210" rev 0x03: msi
>   em0 at pci0 dev 20 function 0 "Intel I354 SGMII" rev 0x03: msi
> 
> More tests are always welcome ;)

I can test this on some other hardware tomorrow.

I read through it and it all makes sense to me.  One question below,
but ok jmatthew@

> @@ -2597,13 +2635,6 @@ em_initialize_receive_unit(struct em_sof
>   E1000_WRITE_REG(>hw, ITR, DEFAULT_ITR);
>   }
>  
> - /* Setup the Base and Length of the Rx Descriptor Ring */
> - bus_addr = sc->sc_rx_dma.dma_map->dm_segs[0].ds_addr;
> - E1000_WRITE_REG(>hw, RDLEN(0),
> - sc->sc_rx_slots * sizeof(*sc->sc_rx_desc_ring));
> - E1000_WRITE_REG(>hw, RDBAH(0), (u_int32_t)(bus_addr >> 32));
> - E1000_WRITE_REG(>hw, RDBAL(0), (u_int32_t)bus_addr);
> -
>   /* Setup the Receive Control Register */
>   reg_rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
>   E1000_RCTL_RDMTS_HALF |
> @@ -2653,6 +2684,13 @@ em_initialize_receive_unit(struct em_sof
>   if (sc->hw.mac_type == em_82573)
>   E1000_WRITE_REG(>hw, RDTR, 0x20);
>  
> + /* Setup the Base and Length of the Rx Descriptor Ring */
> + bus_addr = que->rx.sc_rx_dma.dma_map->dm_segs[0].ds_addr;
> + E1000_WRITE_REG(>hw, RDLEN(0),
> + sc->sc_rx_slots * sizeof(*que->rx.sc_rx_desc_ring));
> + E1000_WRITE_REG(>hw, RDBAH(0), (u_int32_t)(bus_addr >> 32));
> + E1000_WRITE_REG(>hw, RDBAL(0), (u_int32_t)bus_addr);
> +

Just to make sure that I follow how this works, should we be setting
RDLEN(que->me) rather than RDLEN(0) here, and likewise for RDBAH and RDBAL?
This value is always 0 at this stage, so of course it doesn't matter yet.



Re: em(4) towards multiqueues

2020-02-16 Thread Hrvoje Popovski
On 14.2.2020. 18:28, Martin Pieuchot wrote:
> I'm running this on:
> 
>   em0 at pci1 dev 0 function 0 "Intel I210" rev 0x03: msi
>   em0 at pci0 dev 20 function 0 "Intel I354 SGMII" rev 0x03: msi
> 
> More tests are always welcome ;)


em0 at pci0 dev 25 function 0 "Intel 82579LM" rev 0x04: msi
em1 at pci1 dev 0 function 1 "Intel 82571EB" rev 0x06: apic 0 int 17
em4 at pci2 dev 0 function 1 "Intel 82576" rev 0x01: msi
em5 at pci3 dev 0 function 0 "Intel 82572EI" rev 0x06: apic 0 int 16
em0 at pci9 dev 0 function 0 "Intel I350" rev 0x01: msi

it just works .. :)