On Mon, Jul 29, 2019 at 12:22:28AM +0200, Olivier Ta??bi wrote:
> On one of my machines equipped with two msk(4) gigabit ethernet devices,
> the reintroduction (rev. 1.32) of counting backpressure in ifiq_input() in
> sys/net/ifq.c causes drops when more than 8 ethernet frames arrive in a short
> amount of time.  I noticed this because this machine is an NFS server, mounted
> with the (client-side) option "-w 32768", but the drops are reproducible
> (visible with netstat -I msk*) simply by sending to the machine a large (e.g.
> 17kB) UDP packet which gets fragmented into >8 IP packets.
> Increasing net.link.ifrxq.pressure_drop (e.g. from the default value 8 to 40)
> suppresses this problem, although I am not sure whether this is the correct
> solution.

i think i see the more fundamental problem. the diff below should fix it
so you don't need to tune the ifrxq pressure levels.

this basically tweaks msk so it enqueues packets for the stack once per
interrupt, rather than once per rx completion event. this helps when you
rx multiple packets per interrupt like you are when a packet gets
fragmented. while here it uses ifiq_input so it can apply earlier
backpressure.

i'll commit this soon, so hopefully it will be in the tree for you to
test.

Index: if_msk.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_msk.c,v
retrieving revision 1.131
diff -u -p -r1.131 if_msk.c
--- if_msk.c    6 Jan 2018 03:11:04 -0000       1.131
+++ if_msk.c    30 Jul 2019 03:59:11 -0000
@@ -134,7 +134,7 @@ int mskcprint(void *, const char *);
 int msk_intr(void *);
 void msk_intr_yukon(struct sk_if_softc *);
 static inline int msk_rxvalid(struct sk_softc *, u_int32_t, u_int32_t);
-void msk_rxeof(struct sk_if_softc *, uint16_t, uint32_t);
+void msk_rxeof(struct sk_if_softc *, struct mbuf_list *, uint16_t, uint32_t);
 void msk_txeof(struct sk_if_softc *);
 static unsigned int msk_encap(struct sk_if_softc *, struct mbuf *, uint32_t);
 void msk_start(struct ifnet *);
@@ -1591,11 +1591,11 @@ msk_rxvalid(struct sk_softc *sc, u_int32
 }
 
 void
-msk_rxeof(struct sk_if_softc *sc_if, uint16_t len, uint32_t rxstat)
+msk_rxeof(struct sk_if_softc *sc_if, struct mbuf_list *ml,
+    uint16_t len, uint32_t rxstat)
 {
        struct sk_softc         *sc = sc_if->sk_softc;
        struct ifnet            *ifp = &sc_if->arpcom.ac_if;
-       struct mbuf_list        ml = MBUF_LIST_INITIALIZER();
        struct mbuf             *m = NULL;
        int                     prod, cons, tail;
        bus_dmamap_t            map;
@@ -1640,8 +1640,7 @@ msk_rxeof(struct sk_if_softc *sc_if, uin
 
        m->m_pkthdr.len = m->m_len = len;
 
-       ml_enqueue(&ml, m);
-       if_input(ifp, &ml);
+       ml_enqueue(ml, m);
 }
 
 void
@@ -1770,8 +1769,12 @@ msk_intr(void *xsc)
        struct sk_if_softc      *sc_if;
        struct sk_if_softc      *sc_if0 = sc->sk_if[SK_PORT_A];
        struct sk_if_softc      *sc_if1 = sc->sk_if[SK_PORT_B];
+       struct mbuf_list        ml[2] = {
+                                       MBUF_LIST_INITIALIZER(),
+                                       MBUF_LIST_INITIALIZER(),
+                               };
        struct ifnet            *ifp0 = NULL, *ifp1 = NULL;
-       int                     claimed = 0, rx[2] = {0, 0};
+       int                     claimed = 0;
        u_int32_t               status;
        struct msk_status_desc  *cur_st;
 
@@ -1809,8 +1812,8 @@ msk_intr(void *xsc)
                switch (cur_st->sk_opcode) {
                case SK_Y2_STOPC_RXSTAT:
                        sc_if = sc->sk_if[cur_st->sk_link & 0x01];
-                       rx[cur_st->sk_link & 0x01] = 1;
-                       msk_rxeof(sc_if, lemtoh16(&cur_st->sk_len),
+                       msk_rxeof(sc_if, &ml[cur_st->sk_link & 0x01],
+                           lemtoh16(&cur_st->sk_len),
                            lemtoh32(&cur_st->sk_status));
                        break;
                case SK_Y2_STOPC_TXSTAT:
@@ -1837,12 +1840,16 @@ msk_intr(void *xsc)
 
        CSR_WRITE_4(sc, SK_Y2_ICR, 2);
 
-       if (rx[0]) {
+       if (!ml_empty(&ml[0])) {
+               if (ifiq_input(&ifp0->if_rcv, &ml[0]))
+                       if_rxr_livelocked(&sc_if0->sk_cdata.sk_rx_ring);
                msk_fill_rx_ring(sc_if0);
                SK_IF_WRITE_2(sc_if0, 0, SK_RXQ1_Y2_PREF_PUTIDX,
                    sc_if0->sk_cdata.sk_rx_prod);
        }
-       if (rx[1]) {
+       if (!ml_empty(&ml[1])) {
+               if (ifiq_input(&ifp1->if_rcv, &ml[1]))
+                       if_rxr_livelocked(&sc_if1->sk_cdata.sk_rx_ring);
                msk_fill_rx_ring(sc_if1);
                SK_IF_WRITE_2(sc_if1, 0, SK_RXQ1_Y2_PREF_PUTIDX,
                    sc_if1->sk_cdata.sk_rx_prod);

Reply via email to