> -----Original Message----- > From: Maciej Fijalkowski <[email protected]> > Sent: Thursday, January 15, 2026 8:53 PM > To: Behera, Vivek (DI FA DSP ICC PRC1) <[email protected]> > Cc: [email protected]; [email protected]; > [email protected]; [email protected]; > [email protected]; [email protected]; intel-wired- > [email protected]; [email protected] > Subject: Re: AW: [PATCH iwl-net v5] igb: Fix trigger of incorrect irq in > igb_xsk_wakeup > > On Thu, Jan 15, 2026 at 11:05:37AM +0000, Behera, VIVEK wrote: > > Hi Maciej > > > > > -----Original Message----- > > > From: Maciej Fijalkowski <[email protected]> > > > Sent: Wednesday, January 14, 2026 7:21 PM > > > To: Behera, Vivek (DI FA DSP ICC PRC1) <[email protected]> > > > Cc: [email protected]; [email protected]; > > > [email protected]; [email protected]; > > > [email protected]; [email protected]; intel-wired- > > > [email protected]; [email protected] > > > Subject: Re: AW: [PATCH iwl-net v5] igb: Fix trigger of incorrect > > > irq in igb_xsk_wakeup > > > > > > On Wed, Jan 14, 2026 at 08:19:37AM +0000, Behera, VIVEK wrote: > > > > > > (...) > > > > > > > > > > > > > > > diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h > > > > > > b/drivers/net/ethernet/intel/igb/e1000_defines.h > > > > > > index fa028928482f..9357564a2d58 100644 > > > > > > --- a/drivers/net/ethernet/intel/igb/e1000_defines.h > > > > > > +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h > > > > > > @@ -443,6 +443,7 @@ > > > > > > #define E1000_ICS_LSC E1000_ICR_LSC /* Link Status > > > > > > Change > */ > > > > > > #define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. > > > threshold > > > > > */ > > > > > > #define E1000_ICS_DRSTA E1000_ICR_DRSTA /* Device Reset > > > Aserted */ > > > > > > +#define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc > > > written > > > > > back */ > > > > > > > > > > > > /* Extended Interrupt Cause Set */ > > > > > > /* E1000_EITR_CNT_IGNR is only for 82576 and newer */ diff > > > > > > --git a/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > > b/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > > index 30ce5fbb5b77..6e51b5b6f131 100644 > > > > > > --- a/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > > +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > > @@ -529,6 +529,13 @@ int igb_xsk_wakeup(struct net_device > > > > > > *dev, > > > > > > u32 qid, u32 > > > > > flags) > > > > > > struct igb_adapter *adapter = netdev_priv(dev); > > > > > > struct e1000_hw *hw = &adapter->hw; > > > > > > struct igb_ring *ring; > > > > > > + struct igb_q_vector *q_vector; > > > > > > + struct napi_struct *rx_napi; > > > > > > + struct napi_struct *tx_napi; > > > > > > + bool trigger_irq_tx = false; > > > > > > + bool trigger_irq_rx = false; > > > > > > + u32 eics_tx = 0; > > > > > > + u32 eics_rx = 0; > > > > > > u32 eics = 0; > > > > > > > > > > > > if (test_bit(__IGB_DOWN, &adapter->state)) @@ -536,27 > > > > > > +543,65 @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, > > > > > > u32 flags) > > > > > > > > > > > > if (!igb_xdp_is_enabled(adapter)) > > > > > > return -EINVAL; > > > > > > - > > > > > > - if (qid >= adapter->num_tx_queues) > > > > > > + /* Check if queue_id is valid. Tx and Rx queue numbers are > > > > > > always > same */ > > > > > > + if (qid >= adapter->num_rx_queues) > > > > > > return -EINVAL; > > > > > > - > > > > > > - ring = adapter->tx_ring[qid]; > > > > > > - > > > > > > - if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) > > > > > > - return -ENETDOWN; > > > > > > - > > > > > > - if (!READ_ONCE(ring->xsk_pool)) > > > > > > + /* Check if flags are valid */ > > > > > > + if (!(flags & XDP_WAKEUP_RX) && !(flags & XDP_WAKEUP_TX)) > > > > > > return -EINVAL; > > > > > > - > > > > > > - if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { > > > > > > - /* Cause software interrupt */ > > > > > > + if (flags & XDP_WAKEUP_RX) { > > > > > > + /* IRQ trigger preparation for Rx */ > > > > > > + ring = adapter->rx_ring[qid]; > > > > > > + if (!READ_ONCE(ring->xsk_pool)) > > > > > > + return -ENXIO; > > > > > > + q_vector = ring->q_vector; > > > > > > + rx_napi = &q_vector->napi; > > > > > > + /* Extend the BIT mask for eics */ > > > > > > + eics_rx = ring->q_vector->eims_value; > > > > > > + trigger_irq_rx = true; > > > > > > + } > > > > > > + if (flags & XDP_WAKEUP_TX) { > > > > > > + if (adapter->flags & IGB_FLAG_QUEUE_PAIRS) { > > > > > > + /* In queue-pair mode, rx_ring and tx_ring share the > > > > > > same > > > q_vector, > > > > > > + * so a single IRQ trigger will wake both RX and TX > processing > > > > > > + */ > > > > > > + } else { > > > > > > + /* IRQ trigger preparation for Tx */ > > > > > > + ring = adapter->tx_ring[qid]; > > > > > > + if (test_bit(IGB_RING_FLAG_TX_DISABLED, > &ring- > > > >flags)) > > > > > > + return -ENETDOWN; > > > > > > + > > > > > > + if (!READ_ONCE(ring->xsk_pool)) > > > > > > + return -ENXIO; > > > > > > + q_vector = ring->q_vector; > > > > > > + tx_napi = &q_vector->napi; > > > > > > + /* Extend the BIT mask for eics */ > > > > > > + eics_tx = ring->q_vector->eims_value; > > > > > > + trigger_irq_tx = true; > > > > > > + } > > > > > > + } > > > > > > + /* All error checks are finished. Check and update napi > > > > > > +states for rx and tx > > > */ > > > > > > + if (trigger_irq_rx) { > > > > > > + if (!napi_if_scheduled_mark_missed(rx_napi)) > > > > > > + eics |= eics_rx; > > > > > > + } > > > > > > + if (trigger_irq_tx) { > > > > > > + if (!napi_if_scheduled_mark_missed(tx_napi)) > > > > > > + eics |= eics_tx; > > > > > > + } > > > > > > + /* Now we trigger the required irqs for Rx and Tx */ > > > > > > + if ((trigger_irq_rx) || (trigger_irq_tx)) { > > > > > > if (adapter->flags & IGB_FLAG_HAS_MSIX) { > > > > > > - eics |= ring->q_vector->eims_value; > > > > > > wr32(E1000_EICS, eics); > > > > > > } else { > > > > > > - wr32(E1000_ICS, E1000_ICS_RXDMT0); > > > > > > + if ((trigger_irq_rx) && (trigger_irq_tx)) > > > > > > + wr32(E1000_ICS, > > > > > > + E1000_ICS_RXDMT0 | > E1000_ICS_TXDW); > > > > > > + else if (trigger_irq_rx) > > > > > > + wr32(E1000_ICS, E1000_ICS_RXDMT0); > > > > > > + else > > > > > > + wr32(E1000_ICS, E1000_ICS_TXDW); > > > > > > > > > > My understanding is something below would be sufficient. Bits > > > > > set on E1000_ICS are not handled in any way so we don't have to > > > > > distinguish between rx/tx, it's just the matter of irq trigger and > > > > > napi > schedule. > > > > > > > > > Hi see my comments below > > > > > -----------------8<----------------- > > > > > > > > > > diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > b/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > index 30ce5fbb5b77..0aba7afd6a03 100644 > > > > > --- a/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c > > > > > @@ -524,12 +524,26 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, > > > > > struct xsk_buff_pool *xsk_pool) > > > > > return nb_pkts < budget; > > > > > } > > > > > > > > > > +static void igb_sw_irq(struct igb_q_vector *q_vector) { > > > > > + u32 eics = 0; > > > > > + > > > > > + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) { > > > > > + /* Cause software interrupt */ > > > > > + if (adapter->flags & IGB_FLAG_HAS_MSIX) { > > > > > + eics |= ring->q_vector->eims_value; > > > > > + wr32(E1000_EICS, eics); > > > > > + } else { > > > > > + wr32(E1000_ICS, E1000_ICS_RXDMT0); > > > > So here it is sufficient to rely on the E1000_ICS_RXDMT0 bit to > > > > trigger the > > > correct irq (Tx and Rx)? > > > > I remember I received a review comment from Intel point to > > > > E1000_ICS_TXDW > > > as being the correct bit of triggering TX for non MSIX case. > > > > I can't really evaluate this since I don't have a setup to test this. > > > > But okay > > > > > > I don't see in irq handlers that we do any specific handling for > > > txdw vs rxdmt0. It's rather a matter of getting an irq here. > > Yes amongst the interrupt cause checks implemented in the msi > > interrupt handler there is no interest in either E1000_ICR_RXDMT0 > E1000_ICR_TXDW as events. > > All that matters in this in this case is a softirq trigerring > > napi_schedule . So we can stick to E1000_ICS_RXDMT0 irrespective of > > the flag in the xsk wakeup function. Although I must mention that all > > other usages of E1000_ICS_RXDMT0 in the kernel are in combination with > rx_ring so from the code perspective this usage looks strange to someone > without > deeper knowledge of the system. > > > > > > > > + } > > > > > + } > > > > > +} > > > > > + > > > > > int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { > > > > > struct igb_adapter *adapter = netdev_priv(dev); > > > > > struct e1000_hw *hw = &adapter->hw; > > > > > struct igb_ring *ring; > > > > > - u32 eics = 0; > > > > > > > > > > if (test_bit(__IGB_DOWN, &adapter->state)) > > > > > return -ENETDOWN; > > > > > @@ -548,14 +562,15 @@ int igb_xsk_wakeup(struct net_device *dev, > > > > > u32 qid, u32 > > > > > flags) > > > > > if (!READ_ONCE(ring->xsk_pool)) > > > > > return -EINVAL; > > > > > > > > > > - if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { > > > > > - /* Cause software interrupt */ > > > > > - if (adapter->flags & IGB_FLAG_HAS_MSIX) { > > > > > - eics |= ring->q_vector->eims_value; > > > > > - wr32(E1000_EICS, eics); > > > > > - } else { > > > > > - wr32(E1000_ICS, E1000_ICS_RXDMT0); > > > > > - } > > > > > + if (flags & XDP_WAKEUP_TX) > > > > > + igb_sw_irq(ring->q_vector); > > > > > + > > > > > + if (flags & XDP_WAKEUP_RX) { > > > > > + ring = adapter->rx_ring[qid]; > > > > > + /* for !IGB_FLAG_QUEUE_PAIRS, this will be NOP as > NAPI has > > > > > + * been already marked with NAPIF_STATE_MISSED > > > > > + */ > > > > I think you meant for the case IGB_FLAG_QUEUE_PAIRS. Since when > > > > the queue pairs are not active the Tx AND Rx queues don't share > > > > the same qvector and consequently not the same NAPI > > > > > > yes, correct > > > > > > > > + igb_sw_irq(ring->q_vector); > > > > Okay so you would be triggering soft irq's in two steps if both TX > > > > and RX flags > > > are set. > > > > Honestly, I have tried to avoid doing this in my patch. Which is > > > > the reason why I wait to finish all the error checks, Napi updates > > > > before triggering the > > > required irq vectors by writing to eics with a single write. > > > > But okay the other approach also works > > > > > > > > > > > > > > > > > > } > > > > > > > > > > return 0; > > > > > > > > > > ----------------->8----------------- > > > > > > > > > > > } > > > > > > } > > > > > > - > > > > > > return 0; > > > > > > } > > > > > > -- > > > > > > 2.34.1 > > > > > > > > > > I think the strategy of triggering interrupts in one step after > > > > performing all the > > > necessary checks is what might make this approach look complex. > > > > IMHO the one step strategy is better and more intuitive. > > > > Unfortunately, there isn't a reference here to go by since none of > > > > the > > > xsk_wakeup hooks implemented in the kernel care about the flags > > > > I can submit a v6 of the patch based the one step approach with > > > > further > > > simplifications. v6 would also include review suggestions I received for > > > v5. > > > > Like this I can also submit the next version to the igc patch. It > > > > follows the same > > > logic as the igb > > > > I have our regression tests with RTC testbench and our Siemens > > > > Profinet RT > > > tester running with these patches with I210 and I226 > > > > > > > > Alternatively, you could submit patches following the igb and igc > > > > following the > > > two-step logic. > > > > > > How about we meet the half way and something below? that would > > > include your request of having a single write to E1000_ICS. > > > > > Yes it sounds reasonable. However I would like to make some > > observations > > > > > > diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c > > > b/drivers/net/ethernet/intel/igb/igb_xsk.c > > > index 30ce5fbb5b77..432b4c7c1850 100644 > > > --- a/drivers/net/ethernet/intel/igb/igb_xsk.c > > > +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c > > > @@ -524,6 +524,17 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, > > > struct xsk_buff_pool *xsk_pool) > > > return nb_pkts < budget; > > > } > > > > > > +static u32 igb_sw_irq_prep(struct igb_q_vector *q_vector) { > > > + u32 eics = 0; > > > + > > > + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) > > > + eics = adapter->flags & IGB_FLAG_HAS_MSIX ? > > > + q_vector->eims_value : 1; > > > + > > > + return eics; > > > +} > > > + > > > int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { > > > struct igb_adapter *adapter = netdev_priv(dev); @@ -548,14 +559,23 > > > @@ int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 > > > flags) > > > if (!READ_ONCE(ring->xsk_pool)) > > > return -EINVAL; > > If we want to implement proper usage of flags then I would move > > everything related to a ring's internal checks inside the if case of the > corresponding if case. > > Use the correct adapter ring (rx or tx) to perform the checks. > > Even though the functionality wise this code is correct why just > > randomly pick the tx ring right at beginning of the function and do > > two checks with it? And same question would pop up to the reviewers > > mind for the igc driver where rx_ring is used. For me correct usage is > > more important than saving some lines of code in the patch > > And for me code that will be easy to maintain is important. > > We could move IGB_RING_FLAG_TX_DISABLED to XDP_WAKEUP_TX branch > but rest is generic. AF_XDP works on pairs of queues so pool is loaded on both > tx and rx pointed by given queue id. IOW xsk_pool will be present on both rx > and > tx rings, queue id validation can stay as is as well as igb works on > 'combined' > channels. Yes. There was never any confusion here that the xsk_buff_pool created by the xdp/xsk subsystem is for both tx and rx and is coupled to the netdev+queue_id combination. Also to your point regarding igb working only with combined queues is something I have pointed out several times during the review of my patches and to make it more clear I had in fact added a comment to my patch. Despite the fact that usage of rx_ring in one function ( igb_xsk_pool_disable) and tx_ring here to get the xsk_pool is irritating I can agree can keep the existing xsk pool's validity check. > > FWIW I had some PoC in the past where I implemented in AF_XDP core support > for async sockets - where pool was loaded *only* on rx or tx side. Not sure > if TSN > workloads would benefit from that? I remember this or a similar topic we in siemens discussed with Intel a few years ago which Included new hooks in a netdev's xdp driver and extensions to libxdp . I don't recall any discussion regarding plans for upstreaming this In some TSN use-cases especially with extremely low jitter tolerances a async mode with tx only traffic is indeed essential. > > > > > > > - if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { > > > - /* Cause software interrupt */ > > > - if (adapter->flags & IGB_FLAG_HAS_MSIX) { > > > - eics |= ring->q_vector->eims_value; > > > + if (flags & XDP_WAKEUP_TX) > > > + eics |= igb_sw_irq_prep(ring->q_vector); > > > + > > > + if (flags & XDP_WAKEUP_RX) { > > > + ring = adapter->rx_ring[qid]; > > > + /* for IGB_FLAG_QUEUE_PAIRS, this will be NOP as NAPI has > > > + * been already marked with NAPIF_STATE_MISSED > > > + */ > > > + eics |= igb_sw_irq_prep(ring->q_vector); > > > + } > > > + > > > + /* Cause software interrupt */ > > > + if (eics) { > > > + if (adapter->flags & IGB_FLAG_HAS_MSIX) > > > wr32(E1000_EICS, eics); > > > - } else { > > > + else > > > wr32(E1000_ICS, E1000_ICS_RXDMT0); > > > - } > > > } > > > > > > return 0; > > > > > > > > > > > Regards > > > > > > > > Vivek > > > > I would like to know your view about the next steps. Would you like me > > to include the changes we discussed in next version of the patch I > > submitted? > > Or would you like to submit the patch you have prepared? > > Hmm...up to you. You can take my suggestions and add some tag, such as > Suggested-by. Okay. I will prepare and send out the next version of the patch Thanks for your suggestions Vivek > > Thanks, > Maciej > > > > > Regards > > > > Vivek
Re: [Intel-wired-lan] [PATCH iwl-net v5] igb: Fix trigger of incorrect irq in igb_xsk_wakeup
Behera, VIVEK via Intel-wired-lan Fri, 16 Jan 2026 03:45:03 -0800
- Re: [Intel-wired-lan] [PATCH iwl-net v5]... Kwapulinski, Piotr
- Re: [Intel-wired-lan] [PATCH iwl-ne... Behera, VIVEK via Intel-wired-lan
- Re: [Intel-wired-lan] [PATCH iwl-net v5]... Behera, VIVEK via Intel-wired-lan
- Re: [Intel-wired-lan] [PATCH iwl-net v5]... Loktionov, Aleksandr
- Re: [Intel-wired-lan] [PATCH iwl-ne... Behera, VIVEK via Intel-wired-lan
- Re: [Intel-wired-lan] [PATCH iwl-net v5]... Maciej Fijalkowski
- Re: [Intel-wired-lan] [PATCH iwl-ne... Behera, VIVEK via Intel-wired-lan
- Re: [Intel-wired-lan] [PATCH iw... Maciej Fijalkowski
- Re: [Intel-wired-lan] [PATC... Behera, VIVEK via Intel-wired-lan
- Re: [Intel-wired-lan] [... Maciej Fijalkowski
- Re: [Intel-wired-l... Behera, VIVEK via Intel-wired-lan
