Add support for RX checksum offload. This is enabled by default and
may be disabled and re-enabled using ethtool:

 # ethtool -K eth0 rx off
 # ethtool -K eth0 rx on

The RAVB provides a simple checksumming scheme which appears to be
completely compatible with CHECKSUM_COMPLETE: a 1's complement sum of
all packet data after the L2 header is appended to packet data; this may
be trivially read by the driver and used to update the skb accordingly.

In terms of performance throughput is close to gigabit line-rate both with
and without RX checksum offload enabled. Perf output, however, appears to
indicate that significantly less time is spent in do_csum(). This is as
expected.

Test results with RX checksum offload enabled:
 # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 
10.4.3.162
 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 
() port 0 AF_INET : demo
 enable_enobufs failed: getprotobyname
 Recv   Send    Send
 Socket Socket  Message  Elapsed
 Size   Size    Size     Time     Throughput
 bytes  bytes   bytes    secs.    10^6bits/sec

  87380  16384  16384    10.00     938.78
 [ perf record: Woken up 14 times to write data ]
 [ perf record: Captured and wrote 3.524 MB /run/perf.data (~153957 samples) ]

 Summary of output of perf report:
    19.49%      ksoftirqd/0  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
     9.88%      ksoftirqd/0  [kernel.kallsyms]  [k] __pi_memcpy
     7.33%      ksoftirqd/0  [kernel.kallsyms]  [k] skb_put
     7.00%      ksoftirqd/0  [kernel.kallsyms]  [k] ravb_poll
     3.89%      ksoftirqd/0  [kernel.kallsyms]  [k] dev_gro_receive
     3.65%          netperf  [kernel.kallsyms]  [k] __arch_copy_to_user
     3.43%          swapper  [kernel.kallsyms]  [k] arch_cpu_idle
     2.77%          swapper  [kernel.kallsyms]  [k] tick_nohz_idle_enter
     1.85%      ksoftirqd/0  [kernel.kallsyms]  [k] __netdev_alloc_skb
     1.80%          swapper  [kernel.kallsyms]  [k] _raw_spin_unlock_irq
     1.64%      ksoftirqd/0  [kernel.kallsyms]  [k] __slab_alloc.isra.79
     1.62%      ksoftirqd/0  [kernel.kallsyms]  [k] __pi___inval_cache_range

Test results without RX checksum offload enabled:
 # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 
10.4.3.162
 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 
() port 0 AF_INET : demo
 enable_enobufs failed: getprotobyname
 Recv   Send    Send
 Socket Socket  Message  Elapsed
 Size   Size    Size     Time     Throughput
 bytes  bytes   bytes    secs.    10^6bits/sec

  87380  16384  16384    10.00     941.09
 [ perf record: Woken up 14 times to write data ]
 [ perf record: Captured and wrote 3.411 MB /run/perf.data (~149040 samples) ]

 Summary of output of perf report:
   17.50%    ksoftirqd/0  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
    10.60%    ksoftirqd/0  [kernel.kallsyms]  [k] __pi_memcpy
     7.91%    ksoftirqd/0  [kernel.kallsyms]  [k] skb_put
     6.95%    ksoftirqd/0  [kernel.kallsyms]  [k] do_csum
     6.22%    ksoftirqd/0  [kernel.kallsyms]  [k] ravb_poll
     3.84%    ksoftirqd/0  [kernel.kallsyms]  [k] dev_gro_receive
     2.53%        netperf  [kernel.kallsyms]  [k] __arch_copy_to_user
     2.53%        swapper  [kernel.kallsyms]  [k] arch_cpu_idle
     2.27%        swapper  [kernel.kallsyms]  [k] tick_nohz_idle_enter
     1.90%    ksoftirqd/0  [kernel.kallsyms]  [k] __pi___inval_cache_range
     1.90%    ksoftirqd/0  [kernel.kallsyms]  [k] __netdev_alloc_skb
     1.52%    ksoftirqd/0  [kernel.kallsyms]  [k] __slab_alloc.isra.79

Above results collected on an R-Car Gen 3 Salvator-X/r8a7796 ES1.0.
Also tested on a R-Car Gen 3 Salvator-X/r8a7795 ES1.0.

By inspection this also appears to be compatible with the ravb found
on R-Car Gen 2 SoCs, however, this patch is currently untested on such
hardware.

Signed-off-by: Simon Horman <horms+rene...@verge.net.au>
---
 drivers/net/ethernet/renesas/ravb_main.c | 58 +++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c 
b/drivers/net/ethernet/renesas/ravb_main.c
index fdf30bfa403b..7c6438cd7de7 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -403,8 +403,9 @@ static void ravb_emac_init(struct net_device *ndev)
        /* Receive frame limit set register */
        ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
 
-       /* PAUSE prohibition */
+       /* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
        ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) |
+                  (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) |
                   ECMR_TE | ECMR_RE, ECMR);
 
        ravb_set_rate(ndev);
@@ -520,6 +521,19 @@ static void ravb_get_tx_tstamp(struct net_device *ndev)
        }
 }
 
+static void ravb_rx_csum(struct sk_buff *skb)
+{
+       u8 *hw_csum;
+
+       /* The hardware checksum is 2 bytes appended to packet data */
+       if (unlikely(skb->len < 2))
+               return;
+       hw_csum = skb_tail_pointer(skb) - 2;
+       skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+       skb->ip_summed = CHECKSUM_COMPLETE;
+       skb_trim(skb, skb->len - 2);
+}
+
 /* Packet receive function for Ethernet AVB */
 static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 {
@@ -587,8 +601,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, 
int q)
                                ts.tv_nsec = le32_to_cpu(desc->ts_n);
                                shhwtstamps->hwtstamp = timespec64_to_ktime(ts);
                        }
+
                        skb_put(skb, pkt_len);
                        skb->protocol = eth_type_trans(skb, ndev);
+                       if (ndev->features & NETIF_F_RXCSUM)
+                               ravb_rx_csum(skb);
                        napi_gro_receive(&priv->napi[q], skb);
                        stats->rx_packets++;
                        stats->rx_bytes += pkt_len;
@@ -1842,6 +1859,41 @@ static int ravb_do_ioctl(struct net_device *ndev, struct 
ifreq *req, int cmd)
        return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&priv->lock, flags);
+
+       /* Disable TX and RX */
+       ravb_rcv_snd_disable(ndev);
+
+       /* Modify RX Checksum setting */
+       if (enable)
+               ravb_modify(ndev, ECMR, 0, ECMR_RCSC);
+       else
+               ravb_modify(ndev, ECMR, ECMR_RCSC, 0);
+
+       /* Enable TX and RX */
+       ravb_rcv_snd_enable(ndev);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ravb_set_features(struct net_device *ndev,
+                            netdev_features_t features)
+{
+       netdev_features_t changed = ndev->features ^ features;
+
+       if (changed & NETIF_F_RXCSUM)
+               ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM);
+
+       ndev->features = features;
+
+       return 0;
+}
+
 static const struct net_device_ops ravb_netdev_ops = {
        .ndo_open               = ravb_open,
        .ndo_stop               = ravb_close,
@@ -1853,6 +1905,7 @@ static const struct net_device_ops ravb_netdev_ops = {
        .ndo_do_ioctl           = ravb_do_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_set_features       = ravb_set_features,
 };
 
 /* MDIO bus init function */
@@ -2004,6 +2057,9 @@ static int ravb_probe(struct platform_device *pdev)
        if (!ndev)
                return -ENOMEM;
 
+       ndev->features |= NETIF_F_RXCSUM;
+       ndev->hw_features |= ndev->features;
+
        pm_runtime_enable(&pdev->dev);
        pm_runtime_get_sync(&pdev->dev);
 
-- 
2.1.4

Reply via email to