On Fri, Feb 07, 2025 at 11:43:42AM +0100, Michal Swiatkowski wrote:
> From: Slawomir Mrozowicz <[email protected]>
> 
> Add Tx Hang detection due to an unhandled MDD Event.
> 
> Previously, a malicious VF could disable the entire port causing
> TX to hang on the E610 card.
> Those events that caused PF to freeze were not detected
> as an MDD event and usually required a Tx Hang watchdog timer
> to catch the suspension, and perform a physical function reset.
> 
> Implement flows in the affected PF driver in such a way to check
> the cause of the hang, detect it as an MDD event and log an
> entry of the malicious VF that caused the Hang.
> 
> The PF blocks the malicious VF, if it continues to be the source
> of several MDD events.
> 
> Reviewed-by: Przemek Kitszel <[email protected]>
> Reviewed-by: Marcin Szycik <[email protected]>
> Signed-off-by: Slawomir Mrozowicz <[email protected]>
> Co-developed-by: Michal Swiatkowski <[email protected]>
> Signed-off-by: Michal Swiatkowski <[email protected]>

...

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> index aa3b498558bc..e07b56625595 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> @@ -1044,6 +1044,7 @@ struct ixgbe_nvm_version {
>  #define IXGBE_GCR_EXT_VT_MODE_16        0x00000001
>  #define IXGBE_GCR_EXT_VT_MODE_32        0x00000002
>  #define IXGBE_GCR_EXT_VT_MODE_64        0x00000003
> +#define IXGBE_GCR_EXT_VT_MODE_MASK   0x00000003

nit: For consistency I think spaces should be used to indent 0x00000003

>  #define IXGBE_GCR_EXT_SRIOV             (IXGBE_GCR_EXT_MSIX_EN | \
>                                        IXGBE_GCR_EXT_VT_MODE_64)
>  

...

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

...

> +static u32 ixgbe_poll_tx_icache(struct ixgbe_hw *hw, u16 queue, u16 idx)
> +{
> +     IXGBE_WRITE_REG(hw, IXGBE_TXDESCIC, queue * idx);
> +     return IXGBE_READ_REG(hw, IXGBE_TXDESCIC);
> +}
> +
> +/**
> + * ixgbe_check_illegal_queue - search for queue with illegal packet
> + * @adapter: structure containing ring specific data
> + * @queue: queue index
> + *
> + * Check if tx descriptor connected with input queue
> + * contains illegal packet.
> + *
> + * Returns: true if queue contain illegal packet.
> + */
> +static bool ixgbe_check_illegal_queue(struct ixgbe_adapter *adapter,
> +                                   u16 queue)
> +{
> +     u32 hdr_len_reg, mss_len_reg, type_reg;
> +     struct ixgbe_hw *hw = &adapter->hw;
> +     u32 mss_len, header_len, reg;
> +
> +     for (u16 i = 0; i < IXGBE_MAX_TX_DESCRIPTORS; i++) {
> +             /* HW will clear bit IXGBE_TXDESCIC_READY when address
> +              * is written to address field. HW will set this bit
> +              * when iCache read is done, and data is ready at TIC_DWx.
> +              * Set descriptor address.
> +              */
> +             read_poll_timeout(ixgbe_poll_tx_icache, reg,
> +                               !(reg & IXGBE_TXDESCIC_READY), 0, 0, false,
> +                               hw, queue, i);
> +
> +             /* read tx descriptor access registers */
> +             hdr_len_reg = IXGBE_READ_REG(hw, 
> IXGBE_TIC_DW2(IXGBE_VLAN_MACIP_LENS_REG));
> +             type_reg = IXGBE_READ_REG(hw, 
> IXGBE_TIC_DW2(IXGBE_TYPE_TUCMD_MLHL));
> +             mss_len_reg = IXGBE_READ_REG(hw, 
> IXGBE_TIC_DW2(IXGBE_MSS_L4LEN_IDX));
> +
> +             /* check if Advanced Context Descriptor */
> +             if (FIELD_GET(IXGBE_ADVTXD_DTYP_MASK, type_reg) !=
> +                 IXGBE_ADVTXD_DTYP_CTXT)
> +                     continue;
> +
> +             /* check for illegal MSS and Header length */
> +             mss_len = FIELD_GET(IXGBE_ADVTXD_MSS_MASK, mss_len_reg);
> +             header_len = FIELD_GET(IXGBE_ADVTXD_HEADER_LEN_MASK,
> +                                    hdr_len_reg);
> +             if ((mss_len + header_len) > SZ_16K) {
> +                     e_warn(probe,
> +                            "mss len + header len too long\n");

nit: The above two lines can be a single line.

> +                     return true;
> +             }
> +     }
> +
> +     return false;
> +}
> +
> +/**
> + * ixgbe_handle_mdd_event - handle mdd event
> + * @adapter: structure containing ring specific data
> + * @tx_ring: tx descriptor ring to handle
> + *
> + * Reset VF driver if malicious vf detected or
> + * illegal packet in an any queue detected.
> + */
> +static void ixgbe_handle_mdd_event(struct ixgbe_adapter *adapter,
> +                                struct ixgbe_ring *tx_ring)
> +{
> +     u16 vf, q;
> +
> +     if (adapter->vfinfo && ixgbe_check_mdd_event(adapter)) {
> +             /* vf mdd info and malicious vf detected */
> +             if (!ixgbe_get_vf_idx(adapter, tx_ring->queue_index, &vf))
> +                     ixgbe_vf_handle_tx_hang(adapter, vf);
> +     } else {
> +             /* malicious vf not detected */
> +             for (q = 0; q < IXGBE_MAX_TX_QUEUES; q++) {
> +                     if (ixgbe_check_illegal_queue(adapter, q) &&
> +                         !ixgbe_get_vf_idx(adapter, q, &vf))
> +                             /* illegal queue detected */
> +                             ixgbe_vf_handle_tx_hang(adapter, vf);

It looks like ixgbe_vf_handle_tx_hang() will run for each illegal queue.
Could that be more than once for a given vf? If so, is that desirable?

> +             }
> +     }
> +}
> +
>  /**
>   * ixgbe_clean_tx_irq - Reclaim resources after transmit completes
>   * @q_vector: structure containing interrupt and ring information

...

Reply via email to