On 13/04/15 15:07, Sergei Shtylyov wrote:
[snip]

> +struct ravb_private {
> +     struct net_device *ndev;
> +     struct platform_device *pdev;
> +     void __iomem *addr;
> +     struct mdiobb_ctrl mdiobb;
> +     u32 num_rx_ring[NUM_RX_QUEUE];
> +     u32 num_tx_ring[NUM_TX_QUEUE];
> +     u32 desc_bat_size;
> +     dma_addr_t desc_bat_dma;
> +     struct ravb_desc *desc_bat;
> +     dma_addr_t rx_desc_dma[NUM_RX_QUEUE];
> +     dma_addr_t tx_desc_dma[NUM_TX_QUEUE];

As a future optimization, you could try to group the variables by
direction: RX and TX such that you have better cache locality.

[snip]

> +static void ravb_set_duplex(struct net_device *ndev)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +
> +     if (priv->duplex)       /* Full */
> +             ravb_write(ndev, ravb_read(ndev, ECMR) | ECMR_DM, ECMR);
> +     else                    /* Half */
> +             ravb_write(ndev, ravb_read(ndev, ECMR) & ~ECMR_DM, ECMR);

        reg = ravb_read(ndev, ECMR);
        if (priv->duplex)
                reg |= ECMR_DM;
        else
                reg &= ~ECMR_DM;
        ravb_writel(ndev, reg, ECMR);

> +}
> +
> +static void ravb_set_rate(struct net_device *ndev)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +
> +     switch (priv->speed) {
> +     case 100:               /* 100BASE */
> +             ravb_write(ndev, GECMR_SPEED_100, GECMR);
> +             break;
> +     case 1000:              /* 1000BASE */
> +             ravb_write(ndev, GECMR_SPEED_1000, GECMR);
> +             break;
> +     default:
> +             break;
> +     }

That still won't quite work with 10Mbits/sec will it? Or is this
controller 100/1000 only (which would be extremely surprising).

[snip]

> +             if (desc_status & (MSC_CRC | MSC_RFE | MSC_RTSF | MSC_RTLF |
> +                                MSC_CEEF)) {
> +                     stats->rx_errors++;
> +                     if (desc_status & MSC_CRC)
> +                             stats->rx_crc_errors++;
> +                     if (desc_status & MSC_RFE)
> +                             stats->rx_frame_errors++;
> +                     if (desc_status & (MSC_RTLF | MSC_RTSF))
> +                             stats->rx_length_errors++;
> +                     if (desc_status & MSC_CEEF)
> +                             stats->rx_missed_errors++;

The flow after the else condition, while refiling might deserve some
explanation.

> +             } else {
> +                     u32 get_ts = priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE;
> +
> +                     skb = priv->rx_skb[q][entry];

Based on the refill logic below, it seems to me like you could leave
holes in your ring where rx_skb[q][entry] is NULL, should not that be
checked here?

> +                     priv->rx_skb[q][entry] = NULL;
> +                     dma_sync_single_for_cpu(&ndev->dev, desc->dptr,
> +                                             ALIGN(priv->rx_buffer_size, 16),
> +                                             DMA_FROM_DEVICE);
> +                     get_ts &= (q == RAVB_NC) ?
> +                                     RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
> +                                     ~RAVB_RXTSTAMP_TYPE_V2_L2_EVENT;
> +                     if (get_ts) {
> +                             struct skb_shared_hwtstamps *shhwtstamps;
> +
> +                             shhwtstamps = skb_hwtstamps(skb);
> +                             memset(shhwtstamps, 0, sizeof(*shhwtstamps));
> +                             ts.tv_sec = ((u64)desc->ts_sh << 32) |
> +                                         desc->ts_sl;
> +                             ts.tv_nsec = (u64)desc->ts_n;
> +                             shhwtstamps->hwtstamp = timespec64_to_ktime(ts);
> +                     }
> +                     skb_put(skb, pkt_len);
> +                     skb->protocol = eth_type_trans(skb, ndev);
> +                     if (q == RAVB_NC)
> +                             netif_rx(skb);
> +                     else
> +                             netif_receive_skb(skb);

Can't you always invoke netif_receive_skb() here? Why is there a special
queue?

> +                     stats->rx_packets++;
> +                     stats->rx_bytes += pkt_len;
> +             }
> +
> +             entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
> +             desc = &priv->rx_ring[q][entry];
> +     }
> +
> +     /* Refill the RX ring buffers. */
> +     for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
> +             entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
> +             desc = &priv->rx_ring[q][entry];
> +             /* The size of the buffer should be on 16-byte boundary. */
> +             desc->ds = ALIGN(priv->rx_buffer_size, 16);
> +
> +             if (!priv->rx_skb[q][entry]) {
> +                     skb = netdev_alloc_skb(ndev, skb_size);
> +                     if (!skb)
> +                             break;  /* Better luck next round. */

Should this really be a break or a continue?

[snip]

> +/* function for waiting dma process finished */
> +static void ravb_wait_stop_dma(struct net_device *ndev)
> +{

Should not you stop the MAC TX here as well for consistency?

> +     /* Wait for stopping the hardware TX process */
> +     ravb_wait(ndev, TCCR, TCCR_TSRQ0 | TCCR_TSRQ1 | TCCR_TSRQ2 | TCCR_TSRQ3,
> +               0);
> +
> +     ravb_wait(ndev, CSR, CSR_TPO0 | CSR_TPO1 | CSR_TPO2 | CSR_TPO3, 0);
> +
> +     /* Stop the E-MAC's RX processes. */
> +     ravb_write(ndev, ravb_read(ndev, ECMR) & ~ECMR_RE, ECMR);

[snip]

> +             /* Transmited network control queue */
> +             if (tis & TIS_FTF1) {
> +                     ravb_tx_free(ndev, RAVB_NC);
> +                     netif_wake_queue(ndev);

This would be better moved to the NAPI handler.

> +                     result = IRQ_HANDLED;
> +             }

[snip]

> +     if (ecmd->duplex == DUPLEX_FULL)
> +             priv->duplex = 1;
> +     else
> +             priv->duplex = 0;

Why not use what priv->phydev->duplex has cached for you?

> +
> +     ravb_set_duplex(ndev);
> +
> +error_exit:
> +     mdelay(1);
> +
> +     /* Enable TX and RX */
> +     ravb_rcv_snd_enable(ndev);
> +
> +     spin_unlock_irqrestore(&priv->lock, flags);
> +
> +     return error;
> +}
> +
> +static int ravb_nway_reset(struct net_device *ndev)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +     int error = -ENODEV;
> +     unsigned long flags;
> +
> +     if (priv->phydev) {

Is checking against priv->phydev really necessary, it does not look like
the driver will work or accept an invalid PHY device at all anyway?

> +             spin_lock_irqsave(&priv->lock, flags);
> +             error = phy_start_aneg(priv->phydev);
> +             spin_unlock_irqrestore(&priv->lock, flags);
> +     }
> +
> +     return error;
> +}
> +
> +static u32 ravb_get_msglevel(struct net_device *ndev)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +
> +     return priv->msg_enable;
> +}
> +
> +static void ravb_set_msglevel(struct net_device *ndev, u32 value)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +
> +     priv->msg_enable = value;
> +}
> +
> +static const char ravb_gstrings_stats[][ETH_GSTRING_LEN] = {
> +     "rx_queue_0_current",
> +     "tx_queue_0_current",
> +     "rx_queue_0_dirty",
> +     "tx_queue_0_dirty",
> +     "rx_queue_0_packets",
> +     "tx_queue_0_packets",
> +     "rx_queue_0_bytes",
> +     "tx_queue_0_bytes",
> +     "rx_queue_0_mcast_packets",
> +     "rx_queue_0_errors",
> +     "rx_queue_0_crc_errors",
> +     "rx_queue_0_frame_errors",
> +     "rx_queue_0_length_errors",
> +     "rx_queue_0_missed_errors",
> +     "rx_queue_0_over_errors",
> +
> +     "rx_queue_1_current",
> +     "tx_queue_1_current",
> +     "rx_queue_1_dirty",
> +     "tx_queue_1_dirty",
> +     "rx_queue_1_packets",
> +     "tx_queue_1_packets",
> +     "rx_queue_1_bytes",
> +     "tx_queue_1_bytes",
> +     "rx_queue_1_mcast_packets",
> +     "rx_queue_1_errors",
> +     "rx_queue_1_crc_errors",
> +     "rx_queue_1_frame_errors_",
> +     "rx_queue_1_length_errors",
> +     "rx_queue_1_missed_errors",
> +     "rx_queue_1_over_errors",
> +};
> +
> +#define RAVB_STATS_LEN       ARRAY_SIZE(ravb_gstrings_stats)
> +
> +static int ravb_get_sset_count(struct net_device *netdev, int sset)
> +{
> +     switch (sset) {
> +     case ETH_SS_STATS:
> +             return RAVB_STATS_LEN;
> +     default:
> +             return -EOPNOTSUPP;
> +     }
> +}
> +
> +static void ravb_get_ethtool_stats(struct net_device *ndev,
> +                                struct ethtool_stats *stats, u64 *data)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +     int i = 0;
> +     int q;
> +
> +     /* Device-specific stats */
> +     for (q = RAVB_BE; q < NUM_RX_QUEUE; q++) {
> +             struct net_device_stats *stats = &priv->stats[q];
> +
> +             data[i++] = priv->cur_rx[q];
> +             data[i++] = priv->cur_tx[q];
> +             data[i++] = priv->dirty_rx[q];
> +             data[i++] = priv->dirty_tx[q];
> +             data[i++] = stats->rx_packets;
> +             data[i++] = stats->tx_packets;
> +             data[i++] = stats->rx_bytes;
> +             data[i++] = stats->tx_bytes;
> +             data[i++] = stats->multicast;
> +             data[i++] = stats->rx_errors;
> +             data[i++] = stats->rx_crc_errors;
> +             data[i++] = stats->rx_frame_errors;
> +             data[i++] = stats->rx_length_errors;
> +             data[i++] = stats->rx_missed_errors;
> +             data[i++] = stats->rx_over_errors;
> +     }
> +}
> +
> +static void ravb_get_strings(struct net_device *ndev, u32 stringset, u8 
> *data)
> +{
> +     switch (stringset) {
> +     case ETH_SS_STATS:
> +             memcpy(data, *ravb_gstrings_stats, sizeof(ravb_gstrings_stats));
> +             break;
> +     }
> +}
> +
> +static void ravb_get_ringparam(struct net_device *ndev,
> +                            struct ethtool_ringparam *ring)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +
> +     ring->rx_max_pending = BE_RX_RING_MAX;
> +     ring->tx_max_pending = BE_TX_RING_MAX;
> +     ring->rx_pending = priv->num_rx_ring[RAVB_BE];
> +     ring->tx_pending = priv->num_tx_ring[RAVB_BE];
> +}
> +
> +static int ravb_set_ringparam(struct net_device *ndev,
> +                           struct ethtool_ringparam *ring)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +     int error;
> +
> +     if (ring->tx_pending > BE_TX_RING_MAX ||
> +         ring->rx_pending > BE_RX_RING_MAX ||
> +         ring->tx_pending < BE_TX_RING_MIN ||
> +         ring->rx_pending < BE_RX_RING_MIN)
> +             return -EINVAL;
> +     if (ring->rx_mini_pending || ring->rx_jumbo_pending)
> +             return -EINVAL;
> +
> +     if (netif_running(ndev)) {
> +             netif_device_detach(ndev);
> +             netif_tx_disable(ndev);
> +             /* Wait for DMA stopping */
> +             ravb_wait_stop_dma(ndev);
> +
> +             /* Stop AVB-DMAC process */
> +             error = ravb_config(ndev);
> +             if (error < 0) {
> +                     netdev_err(ndev,
> +                                "cannot set ringparam! Any AVB processes are 
> still running?\n");
> +                     return error;
> +             }
> +             synchronize_irq(ndev->irq);
> +
> +             /* Free all the skbuffs in the RX queue. */
> +             ravb_ring_free(ndev, RAVB_BE);
> +             ravb_ring_free(ndev, RAVB_NC);
> +             /* Free DMA buffer */
> +             ravb_free_dma_buffer(priv);
> +     }
> +
> +     /* Set new parameters */
> +     priv->num_rx_ring[RAVB_BE] = ring->rx_pending;
> +     priv->num_tx_ring[RAVB_BE] = ring->tx_pending;
> +     priv->num_rx_ring[RAVB_NC] = NC_RX_RING_SIZE;
> +     priv->num_tx_ring[RAVB_NC] = NC_TX_RING_SIZE;
> +
> +     if (netif_running(ndev)) {
> +             error = ravb_ring_init(ndev, RAVB_BE);
> +             if (error < 0) {
> +                     netdev_err(ndev, "%s: ravb_ring_init(RAVB_BE) failed\n",
> +                                __func__);
> +                     return error;
> +             }
> +
> +             error = ravb_ring_init(ndev, RAVB_NC);
> +             if (error < 0) {
> +                     netdev_err(ndev, "%s: ravb_ring_init(RAVB_NC) failed\n",
> +                                __func__);
> +                     return error;
> +             }
> +
> +             error = ravb_dmac_init(ndev);
> +             if (error < 0) {
> +                     netdev_err(ndev, "%s: ravb_dmac_init() failed\n",
> +                                __func__);
> +                     return error;
> +             }
> +
> +             ravb_emac_init(ndev);
> +
> +             netif_device_attach(ndev);
> +     }
> +
> +     return 0;
> +}
> +
> +static int ravb_get_ts_info(struct net_device *ndev,
> +                         struct ethtool_ts_info *info)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +
> +     info->so_timestamping =
> +             SOF_TIMESTAMPING_TX_SOFTWARE |
> +             SOF_TIMESTAMPING_RX_SOFTWARE |
> +             SOF_TIMESTAMPING_SOFTWARE |
> +             SOF_TIMESTAMPING_TX_HARDWARE |
> +             SOF_TIMESTAMPING_RX_HARDWARE |
> +             SOF_TIMESTAMPING_RAW_HARDWARE;
> +     info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
> +     info->rx_filters =
> +             (1 << HWTSTAMP_FILTER_NONE) |
> +             (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
> +             (1 << HWTSTAMP_FILTER_ALL);
> +     info->phc_index = ptp_clock_index(priv->ptp.clock);
> +
> +     return 0;
> +}
> +
> +static const struct ethtool_ops ravb_ethtool_ops = {
> +     .get_settings           = ravb_get_settings,
> +     .set_settings           = ravb_set_settings,
> +     .nway_reset             = ravb_nway_reset,
> +     .get_msglevel           = ravb_get_msglevel,
> +     .set_msglevel           = ravb_set_msglevel,
> +     .get_link               = ethtool_op_get_link,
> +     .get_strings            = ravb_get_strings,
> +     .get_ethtool_stats      = ravb_get_ethtool_stats,
> +     .get_sset_count         = ravb_get_sset_count,
> +     .get_ringparam          = ravb_get_ringparam,
> +     .set_ringparam          = ravb_set_ringparam,
> +     .get_ts_info            = ravb_get_ts_info,
> +};
> +
> +/* Network device open function for Ethernet AVB */
> +static int ravb_open(struct net_device *ndev)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +     int error;
> +
> +     napi_enable(&priv->napi);
> +
> +     error = request_irq(ndev->irq, ravb_interrupt, IRQF_SHARED, ndev->name,
> +                         ndev);
> +     if (error) {
> +             netdev_err(ndev, "cannot request IRQ\n");
> +             goto out_napi_off;
> +     }
> +
> +     /* Descriptor set */
> +     /* +26 gets the maximum ethernet encapsulation, +7 & ~7 because the
> +      * card needs room to do 8 byte alignment, +2 so we can reserve
> +      * the first 2 bytes, and +16 gets room for the status word from the
> +      * card.
> +      */
> +     priv->rx_buffer_size = (ndev->mtu <= 1492 ? PKT_BUF_SZ :
> +                             (((ndev->mtu + 26 + 7) & ~7) + 2 + 16));

Is not that something that should be moved to a local ndo_change_mtu()
function? What happens if I change the MTU of an interface running, does
not that completely break this RX buffer estimation?

> +
> +     error = ravb_ring_init(ndev, RAVB_BE);
> +     if (error)
> +             goto out_free_irq;
> +     error = ravb_ring_init(ndev, RAVB_NC);
> +     if (error)
> +             goto out_free_irq;
> +
> +     /* Device init */
> +     error = ravb_dmac_init(ndev);
> +     if (error)
> +             goto out_free_irq;
> +     ravb_emac_init(ndev);
> +
> +     netif_start_queue(ndev);
> +
> +     /* PHY control start */
> +     error = ravb_phy_start(ndev);
> +     if (error)
> +             goto out_free_irq;
> +
> +     return 0;
> +
> +out_free_irq:
> +     free_irq(ndev->irq, ndev);
> +out_napi_off:
> +     napi_disable(&priv->napi);
> +     return error;
> +}
> +
> +/* Timeout function for Ethernet AVB */
> +static void ravb_tx_timeout(struct net_device *ndev)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +     int i, q;
> +
> +     netif_stop_queue(ndev);
> +
> +     netif_err(priv, tx_err, ndev,
> +               "transmit timed out, status %8.8x, resetting...\n",
> +               ravb_read(ndev, ISS));
> +
> +     /* tx_errors count up */
> +     ndev->stats.tx_errors++;
> +
> +     /* Free all the skbuffs */
> +     for (q = RAVB_BE; q < NUM_RX_QUEUE; q++) {
> +             for (i = 0; i < priv->num_rx_ring[q]; i++) {
> +                     dev_kfree_skb(priv->rx_skb[q][i]);
> +                     priv->rx_skb[q][i] = NULL;
> +             }
> +     }
> +     for (q = RAVB_BE; q < NUM_TX_QUEUE; q++) {
> +             for (i = 0; i < priv->num_tx_ring[q]; i++) {
> +                     dev_kfree_skb(priv->tx_skb[q][i]);
> +                     priv->tx_skb[q][i] = NULL;
> +                     kfree(priv->tx_buffers[q][i]);
> +                     priv->tx_buffers[q][i] = NULL;
> +             }
> +     }
> +
> +     /* Device init */
> +     ravb_dmac_init(ndev);
> +     ravb_emac_init(ndev);
> +     netif_start_queue(ndev);
> +}
> +
> +/* Packet transmit function for Ethernet AVB */
> +static int ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> +{
> +     struct ravb_private *priv = netdev_priv(ndev);
> +     struct ravb_tstamp_skb *ts_skb = NULL;
> +     struct ravb_tx_desc *desc;
> +     unsigned long flags;
> +     void *buffer;
> +     u32 entry;
> +     u32 tccr;
> +     int q;
> +
> +     /* If skb needs TX timestamp, it is handled in network control queue */
> +     q = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ? RAVB_NC : RAVB_BE;
> +
> +     spin_lock_irqsave(&priv->lock, flags);
> +     if (priv->cur_tx[q] - priv->dirty_tx[q] >= priv->num_tx_ring[q] - 4) {

What's so special about 4 here, you don't seem to be using 4 descriptors

> +             if (!ravb_tx_free(ndev, q)) {
> +                     netif_warn(priv, tx_queued, ndev, "TX FD exhausted.\n");
> +                     netif_stop_queue(ndev);
> +                     spin_unlock_irqrestore(&priv->lock, flags);
> +                     return NETDEV_TX_BUSY;
> +             }
> +     }
> +     entry = priv->cur_tx[q] % priv->num_tx_ring[q];
> +     priv->cur_tx[q]++;
> +     spin_unlock_irqrestore(&priv->lock, flags);
> +
> +     if (skb_put_padto(skb, ETH_ZLEN))
> +             return NETDEV_TX_OK;
> +
> +     priv->tx_skb[q][entry] = skb;
> +     buffer = PTR_ALIGN(priv->tx_buffers[q][entry], RAVB_ALIGN);
> +     memcpy(buffer, skb->data, skb->len);

~1500 bytes memcpy(), not good...

> +     desc = &priv->tx_ring[q][entry];

Since we have released the spinlock few lines above, is there something
protecting ravb_tx_free() from concurrently running with this xmit()
call and trashing this entry?

> +     desc->ds = skb->len;
> +     desc->dptr = dma_map_single(&ndev->dev, buffer, skb->len,
> +                                 DMA_TO_DEVICE);
> +     if (dma_mapping_error(&ndev->dev, desc->dptr)) {
> +             dev_kfree_skb_any(skb);
> +             priv->tx_skb[q][entry] = NULL;

Don't you need to make sure this NULL is properly seen by ravb_tx_free()?

> +             return NETDEV_TX_OK;
> +     }
> +
> +     /* TX timestamp required */
> +     if (q == RAVB_NC) {
> +             ts_skb = kmalloc(sizeof(*ts_skb), GFP_ATOMIC);
> +             if (!ts_skb)
> +                     return -ENOMEM;
> +             ts_skb->skb = skb;
> +             ts_skb->tag = priv->ts_skb_tag++;
> +             priv->ts_skb_tag %= 0x400;
> +             list_add_tail(&ts_skb->list, &priv->ts_skb_list);
> +
> +             /* TAG and timestamp required flag */
> +             skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
> +             skb_tx_timestamp(skb);
> +             desc->tsr = 1;
> +             desc->tag = ts_skb->tag;
> +     }
> +
> +     /* Descriptor type must be set after all the above writes */
> +     dma_wmb();
> +     desc->dt = DT_FSINGLE;
> +
> +     spin_lock_irqsave(&priv->lock, flags);
> +     tccr = ravb_read(ndev, TCCR);
> +     if (!(tccr & (TCCR_TSRQ0 << q)))
> +             ravb_write(ndev, tccr | (TCCR_TSRQ0 << q), TCCR);
> +     spin_unlock_irqrestore(&priv->lock, flags);
> +
> +     return NETDEV_TX_OK;

-- 
Florian
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to