Re: Unlocking ix(4) a bit further

2015-11-28 Thread Hrvoje Popovski
On 16.10.2015. 20:30, Mark Kettenis wrote:
>> Date: Fri, 16 Oct 2015 14:13:52 +0200
>> From: Martin Pieuchot 
>>
>> On 08/10/15(Thu) 20:49, Mark Kettenis wrote:
 Date: Wed, 30 Sep 2015 14:30:11 +0200 (CEST)
 From: Mark Kettenis 

 Since people seemed to like my diff for em(4), here is one for ix(4).
 In addition to unlocking the rx completion path, this one also uses
 intr_barrier() and removes the rx mutex that claudio@ put in recently.

 I don't have any ix(4) hardware.  So the only guarantee is that this
 compiles ;).
>>>
>>> Based on the experience with em(4), here is an updated diff.
>>
>> What's happening to this?  Mark did you get any test report?
> 
> Got a report from Hrvoje Popovski that this triggers OACTIVE a lot and
> sometimes even gets stuck completely.

Hi,

i tried to apply this patch to today's current and there are some
errors. could it be because of dlg@ IFF_OACTIVE mpsafe patch?

File to patch: /usr/src/sys/dev/pci/if_ix.c
Patching file /usr/src/sys/dev/pci/if_ix.c using Plan A...
Hunk #1 succeeded at 210.
Hunk #2 succeeded at 525 (offset -2 lines).
Hunk #3 succeeded at 857 (offset -2 lines).
Hunk #4 failed at 867.
Hunk #5 succeeded at 884 (offset -2 lines).
Hunk #6 succeeded at 907 (offset -2 lines).
Hunk #7 succeeded at 1049 (offset -2 lines).
Hunk #8 succeeded at 1115 (offset -2 lines).
Hunk #9 succeeded at 1132 (offset -2 lines).
Hunk #10 succeeded at 1307 (offset 1 line).
Hunk #11 succeeded at 2188 (offset -2 lines).
Hunk #12 succeeded at 2311 (offset 1 line).
Hunk #13 succeeded at 2335 (offset -2 lines).
Hunk #14 succeeded at 2350 (offset 1 line).
Hunk #15 succeeded at 2381 (offset -2 lines).
Hunk #16 failed at 2422.
Hunk #17 succeeded at 2592 (offset 1 line).
Hunk #18 succeeded at 2604 (offset -2 lines).
Hunk #19 succeeded at 2774 (offset 1 line).
Hunk #20 succeeded at 2826 (offset -2 lines).
Hunk #21 succeeded at 2841 (offset 1 line).
Hunk #22 succeeded at 2872 (offset -2 lines).
Hunk #23 failed at 2954.
3 out of 23 hunks failed--saving rejects to /usr/src/sys/dev/pci/if_ix.c.rej



# cat /usr/src/sys/dev/pci/if_ix.c.rej
@@ -872,11 +867,8 @@

if (ISSET(ifp->if_flags, IFF_RUNNING)) {
ixgbe_rxeof(que);
-   refill = 1;
-
-   if (ISSET(ifp->if_flags, IFF_OACTIVE))
-   was_active = 1;
ixgbe_txeof(txr);
+   refill = 1;
}

if (refill) {
@@ -2435,31 +2422,24 @@

txr->next_to_clean = first;

+   num_avail = atomic_add_int_nv(>tx_avail, processed);
+
+   /* All clean, turn off the timer */
+   if (num_avail == sc->num_tx_desc)
+   ifp->if_timer = 0;
+
/*
 * If we have enough room, clear IFF_OACTIVE to tell the stack that
-* it is OK to send packets. If there are no pending descriptors,
-* clear the timeout. Otherwise, if some descriptors have been
freed,
-* restart the timeout.
+* it is OK to send packets.
 */
-   if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD) {
-   ifp->if_flags &= ~IFF_OACTIVE;
-
-   /* If all are clean turn off the timer */
-   if (txr->tx_avail == sc->num_tx_desc) {
-   ifp->if_timer = 0;
-   txr->watchdog_timer = 0;
-   KERNEL_UNLOCK();
-   return FALSE;
-   }
-   /* Some were cleaned, so reset timer */
-   else if (processed) {
-   ifp->if_timer = IXGBE_TX_TIMEOUT;
-   txr->watchdog_timer = IXGBE_TX_TIMEOUT;
-   }
+   if (ISSET(ifp->if_flags, IFF_OACTIVE) &&
+   num_avail > IXGBE_TX_OP_THRESHOLD) {
+   KERNEL_LOCK();
+   CLR(ifp->if_flags, IFF_OACTIVE);
+   ixgbe_start(ifp);
+   KERNEL_UNLOCK();
}

-   KERNEL_UNLOCK();
-
return TRUE;
 }

@@ -2982,10 +2954,6 @@
i = 0;
}
rxr->next_to_check = i;
-   mtx_leave(>rx_mtx);
-
-   while ((mp = ml_dequeue(_ml)))
-   m_freem(mp);

if_input(ifp, );






Re: Unlocking ix(4) a bit further

2015-10-20 Thread Hrvoje Popovski
On 16.10.2015. 20:30, Mark Kettenis wrote:
>> Date: Fri, 16 Oct 2015 14:13:52 +0200
>> From: Martin Pieuchot 
>>
>> On 08/10/15(Thu) 20:49, Mark Kettenis wrote:
 Date: Wed, 30 Sep 2015 14:30:11 +0200 (CEST)
 From: Mark Kettenis 

 Since people seemed to like my diff for em(4), here is one for ix(4).
 In addition to unlocking the rx completion path, this one also uses
 intr_barrier() and removes the rx mutex that claudio@ put in recently.

 I don't have any ix(4) hardware.  So the only guarantee is that this
 compiles ;).
>>>
>>> Based on the experience with em(4), here is an updated diff.
>>
>> What's happening to this?  Mark did you get any test report?
> 
> Got a report from Hrvoje Popovski that this triggers OACTIVE a lot and
> sometimes even gets stuck completely.

Hi all,

i'm confirming what Mark said. I must admit that this traffic is
synthetic and maybe too aggressive for normal scenario.

With if_ix.c rev 1.125 i can't trigger OACTIVE flag, box is stable but i
can trigger
ix1: unable to fill any rx descriptors
ix1: Could not setup receive structures
I'm sorry that i haven't seen this with revision 1.125.


I will try to describe what I'm seeing with Marks patch.

traffic is routed between ix interfaces
sender is connected to ix1 (82599)
receiver is connected to ix0 (82599)

pf=NO
ddb.console=1
kern.pool_debug=1
net.inet.ip.forwarding=1
net.inet.ip.ifq.maxlen=1024

box is freshly rebooted and i'm generating cca 10Mpps
box is unstable and on receiver pps vary from 180kpps to 1.1Mpps (i
never seen 2Mpps on receiver)

# netstat -i 1
  em0 inem0 out  total in  total out
 packets  errs  packets  errs colls   packets  errs  packets  errs colls
 833 0  649 0 0  259125144 0 146427665 0 0
   2 02 0 0   1035261 0   341645 0 0
   1 01 0 0979946 0   177223 0 0
   1 02 0 0   1451247 0  1287839 0 0
   2 01 0 0   1095354 0   847450 0 0
   3 01 0 0985081 0   180134 0 0
   1 01 0 0   1034061 0   370778 0 0
   2 02 0 0   2182566 0  1420698 0 0
   1 01 0 0   1137561 0  1115412 0 0
   2 01 0 0   1012692 0   257518 0 0
   3 01 0 0965894 0   180484 0 0
   3 02 0 0988832 0   179729 0 0
   2 01 0 0959809 0   178349 0 0
   2 01 0 0   1003070 0   179792 0 0
   1 01 0 0989400 0   179187 0 0
   1 01 0 0   1007894 0   182429 0 0
   1 01 0 0   2211264 0  1455209 0 0
   1 01 0 0   2161230 0  2122498 0 0
   2 01 0 0   1136320 0  1078249 0 0
   1 01 0 0   1046122 0   486958 0 0
   2 02 0 0994789 0   177598 0 0


doing ifconfig ix0 down while generating traffic makes box really
unstable and sometimes breaks ssh session.  
after ifconfig ix0 down/up while generating traffic i'm getting stable
1Mpps, box is responsive and everything is fine ...

# netstat -i 1
  em0 inem0 out  total in  total out
 packets  errs  packets  errs colls   packets  errs  packets  errs colls
2348 0 1714 0 0  1102233921 0 58906 0 0
   2 03 0 0   1145492 0  1135339 0 0
   1 01 0 0   1526533 0  1492768 0 0
   3 03 0 0   1109545 0  1056238 0 0
   2 02 0 0   1255325 0  1250910 0 0
   2 02 0 0   1557626 0  1527923 0 0
   2 02 0 0   1136320 0  1113975 0 0
   2 02 0 0   1476924 0  1442136 0 0
   2 02 0 0   2248410 0  2203062 0 0
   2 02 0 0   2189965 0  2145617 0 0
   2 02 0 0   1786688 0  1740730 0 0
   2 02 0 0   2090607 0  2045849 0 0
   2 02 0 0   2151949 0  2110065 0 0
   3 04 0 0   1613589 0  1576791 0 0
   2 02 0 0   1958826 0  1915090 0 0
   2 02 0 0   1843465 0  1800886 0 0
   2 02 0 0   1107742 0  1054389 0 0
   3 02 0 0   1227703 0  

Re: Unlocking ix(4) a bit further

2015-10-16 Thread Mark Kettenis
> Date: Fri, 16 Oct 2015 14:13:52 +0200
> From: Martin Pieuchot 
> 
> On 08/10/15(Thu) 20:49, Mark Kettenis wrote:
> > > Date: Wed, 30 Sep 2015 14:30:11 +0200 (CEST)
> > > From: Mark Kettenis 
> > > 
> > > Since people seemed to like my diff for em(4), here is one for ix(4).
> > > In addition to unlocking the rx completion path, this one also uses
> > > intr_barrier() and removes the rx mutex that claudio@ put in recently.
> > > 
> > > I don't have any ix(4) hardware.  So the only guarantee is that this
> > > compiles ;).
> > 
> > Based on the experience with em(4), here is an updated diff.
> 
> What's happening to this?  Mark did you get any test report?

Got a report from Hrvoje Popovski that this triggers OACTIVE a lot and
sometimes even gets stuck completely.

> > Index: if_ix.c
> > ===
> > RCS file: /home/cvs/src/sys/dev/pci/if_ix.c,v
> > retrieving revision 1.125
> > diff -u -p -r1.125 if_ix.c
> > --- if_ix.c 11 Sep 2015 12:09:10 -  1.125
> > +++ if_ix.c 8 Oct 2015 18:43:36 -
> > @@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru
> > sc->osdep.os_sc = sc;
> > sc->osdep.os_pa = *pa;
> >  
> > -   mtx_init(>rx_mtx, IPL_NET);
> > -
> > /* Set up the timer callout */
> > timeout_set(>timer, ixgbe_local_timer, sc);
> > timeout_set(>rx_refill, ixgbe_rxrefill, sc);
> > @@ -529,10 +527,7 @@ ixgbe_watchdog(struct ifnet * ifp)
> >  
> > /*
> >  * The timer is set to 5 every time ixgbe_start() queues a packet.
> > -* Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
> > -* least one descriptor.
> > -* Finally, anytime all descriptors are clean the timer is
> > -* set to 0.
> > +* Anytime all descriptors are clean the timer is set to 0.
> >  */
> > for (i = 0; i < sc->num_queues; i++, txr++) {
> > if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
> > @@ -864,7 +859,7 @@ ixgbe_intr(void *arg)
> > struct tx_ring  *txr = sc->tx_rings;
> > struct ixgbe_hw *hw = >hw;
> > uint32_t reg_eicr;
> > -   int  i, refill = 0, was_active = 0;
> > +   int  i, refill = 0;
> >  
> > reg_eicr = IXGBE_READ_REG(>hw, IXGBE_EICR);
> > if (reg_eicr == 0) {
> > @@ -874,11 +869,8 @@ ixgbe_intr(void *arg)
> >  
> > if (ISSET(ifp->if_flags, IFF_RUNNING)) {
> > ixgbe_rxeof(que);
> > -   refill = 1;
> > -
> > -   if (ISSET(ifp->if_flags, IFF_OACTIVE))
> > -   was_active = 1;
> > ixgbe_txeof(txr);
> > +   refill = 1;
> > }
> >  
> > if (refill) {
> > @@ -894,6 +886,8 @@ ixgbe_intr(void *arg)
> > if (reg_eicr & IXGBE_EICR_LSC) {
> > KERNEL_LOCK();
> > ixgbe_update_link_status(sc);
> > +   if (!IFQ_IS_EMPTY(>if_snd))
> > +   ixgbe_start(ifp);
> > KERNEL_UNLOCK();
> > }
> >  
> > @@ -915,12 +909,6 @@ ixgbe_intr(void *arg)
> > }
> > }
> >  
> > -   if (was_active) {
> > -   KERNEL_LOCK();
> > -   ixgbe_start(ifp);
> > -   KERNEL_UNLOCK();
> > -   }
> > -
> > /* Check for fan failure */
> > if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
> > (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
> > @@ -1063,17 +1051,9 @@ ixgbe_encap(struct tx_ring *txr, struct 
> > cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
> >  #endif
> >  
> > -   /*
> > -* Force a cleanup if number of TX descriptors
> > -* available is below the threshold. If it fails
> > -* to get above, then abort transmit.
> > -*/
> > -   if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
> > -   ixgbe_txeof(txr);
> > -   /* Make sure things have improved */
> > -   if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> > -   return (ENOBUFS);
> > -   }
> > +   /* Check that we have least the minimal number of TX descriptors. */
> > +   if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> > +   return (ENOBUFS);
> >  
> > /*
> >  * Important to capture the first descriptor
> > @@ -1137,8 +1117,6 @@ ixgbe_encap(struct tx_ring *txr, struct 
> >  
> > txd->read.cmd_type_len |=
> > htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
> > -   txr->tx_avail -= map->dm_nsegs;
> > -   txr->next_avail_desc = i;
> >  
> > txbuf->m_head = m_head;
> > /*
> > @@ -1156,6 +1134,11 @@ ixgbe_encap(struct tx_ring *txr, struct 
> > txbuf = >tx_buffers[first];
> > txbuf->eop_index = last;
> >  
> > +   membar_producer();
> > +
> > +   atomic_sub_int(>tx_avail, map->dm_nsegs);
> > +   txr->next_avail_desc = i;
> > +
> > ++txr->tx_packets;
> > return (0);
> >  
> > @@ -1323,6 +1306,10 @@ ixgbe_stop(void *arg)
> > /* reprogram the RAR[0] in case user changed it. */
> > ixgbe_set_rar(>hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
> >  
> > +   intr_barrier(sc->tag);
> > +
> > + 

Re: Unlocking ix(4) a bit further

2015-10-16 Thread Martin Pieuchot
On 08/10/15(Thu) 20:49, Mark Kettenis wrote:
> > Date: Wed, 30 Sep 2015 14:30:11 +0200 (CEST)
> > From: Mark Kettenis 
> > 
> > Since people seemed to like my diff for em(4), here is one for ix(4).
> > In addition to unlocking the rx completion path, this one also uses
> > intr_barrier() and removes the rx mutex that claudio@ put in recently.
> > 
> > I don't have any ix(4) hardware.  So the only guarantee is that this
> > compiles ;).
> 
> Based on the experience with em(4), here is an updated diff.

What's happening to this?  Mark did you get any test report?

> 
> Index: if_ix.c
> ===
> RCS file: /home/cvs/src/sys/dev/pci/if_ix.c,v
> retrieving revision 1.125
> diff -u -p -r1.125 if_ix.c
> --- if_ix.c   11 Sep 2015 12:09:10 -  1.125
> +++ if_ix.c   8 Oct 2015 18:43:36 -
> @@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru
>   sc->osdep.os_sc = sc;
>   sc->osdep.os_pa = *pa;
>  
> - mtx_init(>rx_mtx, IPL_NET);
> -
>   /* Set up the timer callout */
>   timeout_set(>timer, ixgbe_local_timer, sc);
>   timeout_set(>rx_refill, ixgbe_rxrefill, sc);
> @@ -529,10 +527,7 @@ ixgbe_watchdog(struct ifnet * ifp)
>  
>   /*
>* The timer is set to 5 every time ixgbe_start() queues a packet.
> -  * Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
> -  * least one descriptor.
> -  * Finally, anytime all descriptors are clean the timer is
> -  * set to 0.
> +  * Anytime all descriptors are clean the timer is set to 0.
>*/
>   for (i = 0; i < sc->num_queues; i++, txr++) {
>   if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
> @@ -864,7 +859,7 @@ ixgbe_intr(void *arg)
>   struct tx_ring  *txr = sc->tx_rings;
>   struct ixgbe_hw *hw = >hw;
>   uint32_t reg_eicr;
> - int  i, refill = 0, was_active = 0;
> + int  i, refill = 0;
>  
>   reg_eicr = IXGBE_READ_REG(>hw, IXGBE_EICR);
>   if (reg_eicr == 0) {
> @@ -874,11 +869,8 @@ ixgbe_intr(void *arg)
>  
>   if (ISSET(ifp->if_flags, IFF_RUNNING)) {
>   ixgbe_rxeof(que);
> - refill = 1;
> -
> - if (ISSET(ifp->if_flags, IFF_OACTIVE))
> - was_active = 1;
>   ixgbe_txeof(txr);
> + refill = 1;
>   }
>  
>   if (refill) {
> @@ -894,6 +886,8 @@ ixgbe_intr(void *arg)
>   if (reg_eicr & IXGBE_EICR_LSC) {
>   KERNEL_LOCK();
>   ixgbe_update_link_status(sc);
> + if (!IFQ_IS_EMPTY(>if_snd))
> + ixgbe_start(ifp);
>   KERNEL_UNLOCK();
>   }
>  
> @@ -915,12 +909,6 @@ ixgbe_intr(void *arg)
>   }
>   }
>  
> - if (was_active) {
> - KERNEL_LOCK();
> - ixgbe_start(ifp);
> - KERNEL_UNLOCK();
> - }
> -
>   /* Check for fan failure */
>   if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
>   (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
> @@ -1063,17 +1051,9 @@ ixgbe_encap(struct tx_ring *txr, struct 
>   cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
>  #endif
>  
> - /*
> -  * Force a cleanup if number of TX descriptors
> -  * available is below the threshold. If it fails
> -  * to get above, then abort transmit.
> -  */
> - if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
> - ixgbe_txeof(txr);
> - /* Make sure things have improved */
> - if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> - return (ENOBUFS);
> - }
> + /* Check that we have least the minimal number of TX descriptors. */
> + if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
> + return (ENOBUFS);
>  
>   /*
>* Important to capture the first descriptor
> @@ -1137,8 +1117,6 @@ ixgbe_encap(struct tx_ring *txr, struct 
>  
>   txd->read.cmd_type_len |=
>   htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
> - txr->tx_avail -= map->dm_nsegs;
> - txr->next_avail_desc = i;
>  
>   txbuf->m_head = m_head;
>   /*
> @@ -1156,6 +1134,11 @@ ixgbe_encap(struct tx_ring *txr, struct 
>   txbuf = >tx_buffers[first];
>   txbuf->eop_index = last;
>  
> + membar_producer();
> +
> + atomic_sub_int(>tx_avail, map->dm_nsegs);
> + txr->next_avail_desc = i;
> +
>   ++txr->tx_packets;
>   return (0);
>  
> @@ -1323,6 +1306,10 @@ ixgbe_stop(void *arg)
>   /* reprogram the RAR[0] in case user changed it. */
>   ixgbe_set_rar(>hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
>  
> + intr_barrier(sc->tag);
> +
> + KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
> +
>   /* Should we really clear all structures on stop? */
>   ixgbe_free_transmit_structures(sc);
>   ixgbe_free_receive_structures(sc);
> @@ -2203,11 +2190,13 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
>   tx_buffer->m_head 

Re: Unlocking ix(4) a bit further

2015-10-08 Thread Mark Kettenis
> Date: Wed, 30 Sep 2015 14:30:11 +0200 (CEST)
> From: Mark Kettenis 
> 
> Since people seemed to like my diff for em(4), here is one for ix(4).
> In addition to unlocking the rx completion path, this one also uses
> intr_barrier() and removes the rx mutex that claudio@ put in recently.
> 
> I don't have any ix(4) hardware.  So the only guarantee is that this
> compiles ;).

Based on the experience with em(4), here is an updated diff.

Index: if_ix.c
===
RCS file: /home/cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.125
diff -u -p -r1.125 if_ix.c
--- if_ix.c 11 Sep 2015 12:09:10 -  1.125
+++ if_ix.c 8 Oct 2015 18:43:36 -
@@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru
sc->osdep.os_sc = sc;
sc->osdep.os_pa = *pa;
 
-   mtx_init(>rx_mtx, IPL_NET);
-
/* Set up the timer callout */
timeout_set(>timer, ixgbe_local_timer, sc);
timeout_set(>rx_refill, ixgbe_rxrefill, sc);
@@ -529,10 +527,7 @@ ixgbe_watchdog(struct ifnet * ifp)
 
/*
 * The timer is set to 5 every time ixgbe_start() queues a packet.
-* Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
-* least one descriptor.
-* Finally, anytime all descriptors are clean the timer is
-* set to 0.
+* Anytime all descriptors are clean the timer is set to 0.
 */
for (i = 0; i < sc->num_queues; i++, txr++) {
if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
@@ -864,7 +859,7 @@ ixgbe_intr(void *arg)
struct tx_ring  *txr = sc->tx_rings;
struct ixgbe_hw *hw = >hw;
uint32_t reg_eicr;
-   int  i, refill = 0, was_active = 0;
+   int  i, refill = 0;
 
reg_eicr = IXGBE_READ_REG(>hw, IXGBE_EICR);
if (reg_eicr == 0) {
@@ -874,11 +869,8 @@ ixgbe_intr(void *arg)
 
if (ISSET(ifp->if_flags, IFF_RUNNING)) {
ixgbe_rxeof(que);
-   refill = 1;
-
-   if (ISSET(ifp->if_flags, IFF_OACTIVE))
-   was_active = 1;
ixgbe_txeof(txr);
+   refill = 1;
}
 
if (refill) {
@@ -894,6 +886,8 @@ ixgbe_intr(void *arg)
if (reg_eicr & IXGBE_EICR_LSC) {
KERNEL_LOCK();
ixgbe_update_link_status(sc);
+   if (!IFQ_IS_EMPTY(>if_snd))
+   ixgbe_start(ifp);
KERNEL_UNLOCK();
}
 
@@ -915,12 +909,6 @@ ixgbe_intr(void *arg)
}
}
 
-   if (was_active) {
-   KERNEL_LOCK();
-   ixgbe_start(ifp);
-   KERNEL_UNLOCK();
-   }
-
/* Check for fan failure */
if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
(reg_eicr & IXGBE_EICR_GPI_SDP1)) {
@@ -1063,17 +1051,9 @@ ixgbe_encap(struct tx_ring *txr, struct 
cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
 #endif
 
-   /*
-* Force a cleanup if number of TX descriptors
-* available is below the threshold. If it fails
-* to get above, then abort transmit.
-*/
-   if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
-   ixgbe_txeof(txr);
-   /* Make sure things have improved */
-   if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
-   return (ENOBUFS);
-   }
+   /* Check that we have least the minimal number of TX descriptors. */
+   if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
+   return (ENOBUFS);
 
/*
 * Important to capture the first descriptor
@@ -1137,8 +1117,6 @@ ixgbe_encap(struct tx_ring *txr, struct 
 
txd->read.cmd_type_len |=
htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
-   txr->tx_avail -= map->dm_nsegs;
-   txr->next_avail_desc = i;
 
txbuf->m_head = m_head;
/*
@@ -1156,6 +1134,11 @@ ixgbe_encap(struct tx_ring *txr, struct 
txbuf = >tx_buffers[first];
txbuf->eop_index = last;
 
+   membar_producer();
+
+   atomic_sub_int(>tx_avail, map->dm_nsegs);
+   txr->next_avail_desc = i;
+
++txr->tx_packets;
return (0);
 
@@ -1323,6 +1306,10 @@ ixgbe_stop(void *arg)
/* reprogram the RAR[0] in case user changed it. */
ixgbe_set_rar(>hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
 
+   intr_barrier(sc->tag);
+
+   KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
+
/* Should we really clear all structures on stop? */
ixgbe_free_transmit_structures(sc);
ixgbe_free_receive_structures(sc);
@@ -2203,11 +2190,13 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
tx_buffer->m_head = NULL;
tx_buffer->eop_index = -1;
 
+   membar_producer();
+
/* We've consumed the first desc, adjust counters */
if (++ctxd == sc->num_tx_desc)
ctxd = 0;

Re: Unlocking ix(4) a bit further

2015-09-30 Thread Hrvoje Popovski
On 30.9.2015. 14:30, Mark Kettenis wrote:
> Since people seemed to like my diff for em(4), here is one for ix(4).
> In addition to unlocking the rx completion path, this one also uses
> intr_barrier() and removes the rx mutex that claudio@ put in recently.
> 
> I don't have any ix(4) hardware.  So the only guarantee is that this
> compiles ;).


Hi,

in routed setup between two ix (82599)  i'm getting 1Mpps and system is
stable and responsive.
In bridge setup one minute or so after traffic is generated i'm getting
ifconfig OACTIVE flag on ix0 (receiver side) and traffic is stopped.
Then after 2 or 3 seconds this flag is cleared and traffic is bridged.

pf is disabled ...

i don't remember that i have seen this flag before in same setup... will
try same setup without this patch ...



root@:~
# ifconfig ix
ix0:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a0
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active
ix1:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a1
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active

root@:~
# ifconfig ix
ix0:
flags=8f47
mtu 1500
lladdr a0:36:9f:2e:96:a0
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active
ix1:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a1
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active

root@:~
# ifconfig ix
ix0:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a0
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active
ix1:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a1
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active

root@:~
# ifconfig ix
ix0:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a0
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active
ix1:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a1
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active

root@:~
# ifconfig ix
ix0:
flags=8f47
mtu 1500
lladdr a0:36:9f:2e:96:a0
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active
ix1:
flags=8b47 mtu
1500
lladdr a0:36:9f:2e:96:a1
priority: 0
media: Ethernet autoselect (10GSFP+Cu full-duplex)
status: active




dmesg

OpenBSD 5.8-current (GENERIC.MP) #0: Wed Sep 30 15:54:04 CEST 2015
r...@.srce.hr:/usr/src/sys/arch/amd64/compile/GENERIC.MP
RTC BIOS diagnostic error 80
real mem = 34314588160 (32724MB)
avail mem = 33270497280 (31729MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.7 @ 0x7e67c000 (84 entries)
bios0: vendor IBM version "-[D7E146CUS-1.82]-" date 04/09/2015
bios0: IBM IBM System x3550 M4 Server -[7914T91]-
acpi0 at bios0: rev 2
acpi0: sleep states S0 S5
acpi0: tables DSDT FACP TCPA ERST HEST HPET APIC MCFG OEM0 OEM1 SLIT
SRAT SLIC SSDT SSDT SSDT SSDT DMAR
acpi0: wakeup devices MRP1(S4) DCC0(S4) MRP3(S4) MRP5(S4) EHC2(S5)
PEX0(S5) PEX7(S5) EHC1(S5) IP2P(S3) MRPB(S4) MRPC(S4) MRPD(S4) MRPF(S4)
MRPG(S4) MRPH(S4) MRPI(S4) [...]
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpihpet0 at acpi0: 14318179 Hz
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz, 2400.36 MHz
cpu0:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,LONG,LAHF,PERF,ITSC,FSGSBASE,SMEP,ERMS,SENSOR,ARAT
cpu0: 256KB 64b/line 8-way L2 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
cpu0: apic clock running at 99MHz
cpu0: mwait min=64, max=64, C-substates=0.2.1.1, IBE
cpu1 at mainbus0: apid 2 (application processor)
cpu1: Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz, 2400.00 MHz
cpu1:

Re: Unlocking ix(4) a bit further

2015-09-30 Thread Hrvoje Popovski
On 30.9.2015. 19:20, Hrvoje Popovski wrote:
> On 30.9.2015. 14:30, Mark Kettenis wrote:
>> Since people seemed to like my diff for em(4), here is one for ix(4).
>> In addition to unlocking the rx completion path, this one also uses
>> intr_barrier() and removes the rx mutex that claudio@ put in recently.
>>
>> I don't have any ix(4) hardware.  So the only guarantee is that this
>> compiles ;).
> 
> 
> Hi,
> 
> in routed setup between two ix (82599)  i'm getting 1Mpps and system is
> stable and responsive.
> In bridge setup one minute or so after traffic is generated i'm getting
> ifconfig OACTIVE flag on ix0 (receiver side) and traffic is stopped.
> Then after 2 or 3 seconds this flag is cleared and traffic is bridged.
> 
> pf is disabled ...
> 
> i don't remember that i have seen this flag before in same setup... will
> try same setup without this patch ...

without this patch bridge setup works normally ...



Unlocking ix(4) a bit further

2015-09-30 Thread Mark Kettenis
Since people seemed to like my diff for em(4), here is one for ix(4).
In addition to unlocking the rx completion path, this one also uses
intr_barrier() and removes the rx mutex that claudio@ put in recently.

I don't have any ix(4) hardware.  So the only guarantee is that this
compiles ;).

Index: if_ix.c
===
RCS file: /home/cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.125
diff -u -p -r1.125 if_ix.c
--- if_ix.c 11 Sep 2015 12:09:10 -  1.125
+++ if_ix.c 30 Sep 2015 12:24:50 -
@@ -210,8 +210,6 @@ ixgbe_attach(struct device *parent, stru
sc->osdep.os_sc = sc;
sc->osdep.os_pa = *pa;
 
-   mtx_init(>rx_mtx, IPL_NET);
-
/* Set up the timer callout */
timeout_set(>timer, ixgbe_local_timer, sc);
timeout_set(>rx_refill, ixgbe_rxrefill, sc);
@@ -529,10 +527,7 @@ ixgbe_watchdog(struct ifnet * ifp)
 
/*
 * The timer is set to 5 every time ixgbe_start() queues a packet.
-* Then ixgbe_txeof() keeps resetting to 5 as long as it cleans at
-* least one descriptor.
-* Finally, anytime all descriptors are clean the timer is
-* set to 0.
+* Anytime all descriptors are clean the timer is set to 0.
 */
for (i = 0; i < sc->num_queues; i++, txr++) {
if (txr->watchdog_timer == 0 || --txr->watchdog_timer)
@@ -864,7 +859,7 @@ ixgbe_intr(void *arg)
struct tx_ring  *txr = sc->tx_rings;
struct ixgbe_hw *hw = >hw;
uint32_t reg_eicr;
-   int  i, refill = 0, was_active = 0;
+   int  i, refill = 0;
 
reg_eicr = IXGBE_READ_REG(>hw, IXGBE_EICR);
if (reg_eicr == 0) {
@@ -874,11 +869,8 @@ ixgbe_intr(void *arg)
 
if (ISSET(ifp->if_flags, IFF_RUNNING)) {
ixgbe_rxeof(que);
-   refill = 1;
-
-   if (ISSET(ifp->if_flags, IFF_OACTIVE))
-   was_active = 1;
ixgbe_txeof(txr);
+   refill = 1;
}
 
if (refill) {
@@ -915,12 +907,6 @@ ixgbe_intr(void *arg)
}
}
 
-   if (was_active) {
-   KERNEL_LOCK();
-   ixgbe_start(ifp);
-   KERNEL_UNLOCK();
-   }
-
/* Check for fan failure */
if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
(reg_eicr & IXGBE_EICR_GPI_SDP1)) {
@@ -1063,17 +1049,9 @@ ixgbe_encap(struct tx_ring *txr, struct 
cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
 #endif
 
-   /*
-* Force a cleanup if number of TX descriptors
-* available is below the threshold. If it fails
-* to get above, then abort transmit.
-*/
-   if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) {
-   ixgbe_txeof(txr);
-   /* Make sure things have improved */
-   if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
-   return (ENOBUFS);
-   }
+   /* Check that we have least the minimal number of TX descriptors. */
+   if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD)
+   return (ENOBUFS);
 
/*
 * Important to capture the first descriptor
@@ -1137,7 +1115,7 @@ ixgbe_encap(struct tx_ring *txr, struct 
 
txd->read.cmd_type_len |=
htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
-   txr->tx_avail -= map->dm_nsegs;
+   atomic_sub_int(>tx_avail, map->dm_nsegs);
txr->next_avail_desc = i;
 
txbuf->m_head = m_head;
@@ -1323,6 +1301,10 @@ ixgbe_stop(void *arg)
/* reprogram the RAR[0] in case user changed it. */
ixgbe_set_rar(>hw, 0, sc->hw.mac.addr, 0, IXGBE_RAH_AV);
 
+   intr_barrier(sc->tag);
+
+   KASSERT((ifp->if_flags & IFF_RUNNING) == 0);
+
/* Should we really clear all structures on stop? */
ixgbe_free_transmit_structures(sc);
ixgbe_free_receive_structures(sc);
@@ -2207,7 +2189,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
if (++ctxd == sc->num_tx_desc)
ctxd = 0;
txr->next_avail_desc = ctxd;
-   --txr->tx_avail;
+   atomic_dec_int(>tx_avail);
 
return (0);
 }
@@ -2324,7 +2306,7 @@ ixgbe_tso_setup(struct tx_ring *txr, str
if (++ctxd == sc->num_tx_desc)
ctxd = 0;
 
-   txr->tx_avail--;
+   atomic_dec_int(>tx_avail);
txr->next_avail_desc = ctxd;
*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
@@ -2346,6 +2328,7 @@ ixgbe_txeof(struct tx_ring *txr)
struct ix_softc *sc = txr->sc;
struct ifnet*ifp = >arpcom.ac_if;
uint32_t first, last, done, processed;
+   uint32_t num_avail;
struct ixgbe_tx_buf *tx_buffer;
struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
 
@@ -2357,23 +2340,17 @@ 

Re: Unlocking ix(4) a bit further

2015-09-30 Thread Alexey Suslikov
Mark Kettenis  xs4all.nl> writes:

> + * Thise parameter controls the minimum number of available transmit

"Thise" should be "This" here.