Vladimir Cotfas wrote:
> Patch for
> 1. The outstanding issue where a RTNET TX busy condition is not signaled
> correctly to the Linux stack so some packets are dropped on the floor. Can
> be reproduced by FTP'in a 10M file *from* the RTNET box: ACK packets sent by
> Linux/RTNET are dropped and some times the transfer stalls. The xmit
> function was changed to return a proper value to the Linux stack.
> 2. Bumping the number of rtskbs from 16 to 256 has a positive effect on FTP
> transfers.
> 
> The code is instrumented for rtskb shortages via stats and a /proc entry.
> 
> The net_device structure is no longer a static global [which was incorrect].
> It is dynamically allocated.

We are making progress. Still, I identify at least 5 reasons for
separate patches:

 - fix for TX hang
 - fix for net_device structure management
 - increased pool size - or switch to queues (see below)
 - statistics (see comment below)
 - remaining refactorings / cleanups without or with minor functional
   impact

> 
> As I am not trusting much the MIME capabilities of gmail I have also
> attached a copy of the patch.

You were right, gmail line-wrapped your patches. If your corporate mail
server does not accept plain smtp sessions or mangles the mails, just
use a private account.

> 
> -Vlad
> 
> Signed-off-by: Vladimir Cotfas <vl...@mantatest.com>
> 
> diff --git a/addons/rtnetproxy.c b/addons/rtnetproxy.c
> index 2358d09..c131215 100644
> --- a/addons/rtnetproxy.c
> +++ b/addons/rtnetproxy.c
> @@ -23,6 +23,10 @@
>   *
>   * Changelog:
>   *
> + * 14-Jan-2010  Vladimir Cotfas - Modified xmit function to return TX_BUSY
> to Linux.
> + *                                Instrumented code to track rtskb
> shortages.
> + *                                Tuned parameters for better performance
> w/ FTP.
> + *

That's what belongs today into the individual changelogs of patches (ie.
your local git commits). The above would qualify as subject, a few more
lines why things were changed are required. Browse the existing commits
(where you will surely also find bad examples of mine).

>   * 08-Nov-2002  Mathias Koehrer - Clear separation between rtai context and
>   *                                standard linux driver context.
>   *                                Data exchange via ringbuffers.
> @@ -51,6 +55,7 @@
>  #include <linux/if_ether.h> /* For the statistics structure. */
>  #include <linux/if_arp.h>   /* For ARPHRD_ETHER */
> 
> +#include <linux/proc_fs.h>      // for the proc filesystem
> 
>  #include <rtdev.h>
>  #include <rtskb.h>
> @@ -70,11 +75,24 @@ MODULE_PARM_DESC(proxy_rtskbs, "Number of realtime
> socket buffers in proxy pool"
> 
>  static struct rtskb_queue rtskb_pool;
> 
> +static struct Stats {
> +    volatile long rtskb_pool_empty;
> +    volatile long rtskb_ringbuffer;
> +    volatile long rtskb_ringbuffer_full;
> +    volatile long rx_dropped;
> +    volatile long rx_out;
> +    volatile long tx_in;
> +    volatile long tx_full;
> +    volatile long tx_pending;
> +} stats;

What of the above stats cannot be mapped on the netdev stats of
rtnetproxy's Linux device? If only minor things, I'm in strong favor of
going that way.

> +
> +static struct proc_dir_entry* procFileEntry;
> 
>  /*
> **************************************************************************
>   *  SKB ringbuffers for exchanging data between rtnet and kernel:
>   * ************************************************************************
> */
> -#define SKB_RINGBUFFER_SIZE 16
> +//#define SKB_RINGBUFFER_SIZE 16
> +#define SKB_RINGBUFFER_SIZE 256

I do not recall why rtnetproxy originally chose ring buffers. I think
it's simply overdesigned and could easily be replaced by rtskb_queues:

On reception, you already have an rtskb that fits into some
RTnet-to-Linux queue. On xmit, allocate an rtskb first and push the data
into it, then queue that rtskb into Linux-to-RTnet. So only proxy_rtskbs
remains as dynamically(!) tunable. Isn't that better?

>  typedef struct
>  {
>      int wr;        // Offset to wr: Increment before write!
> @@ -113,6 +131,57 @@ MODULE_PARM_DESC(rtdev_attach, "Attach to the specified
> RTnet device");
>  struct rtnet_device *rtnetproxy_rtdev;
>  #endif
> 
> +struct rtnetproxy_priv {
> +    struct net_device* dev;
> +};
> +
> +#define STATIC_BUF_LEN 40960
> +
> +static int procfile_read(char* buffer_UNUSED, char** start,
> +                         off_t offset, int count,
> +                         int* eof, void* data)
> +{
> +  static int len = 0;
> +  static char s_buf[STATIC_BUF_LEN] = {0};
> +  const int buf_cnt = STATIC_BUF_LEN - 1;
> +  char* buf = s_buf;
> +
> +  if(offset > 0) {
> +    *start = s_buf + offset;
> +    *eof = (len >= offset) ? 0 : 1;
> +    return eof ? (len - offset) : 0;
> +  }
> +
> +  len = 0;
> +
> +  len += snprintf(buf + len, buf_cnt - len - 1, "Stats:\n" \
> +                    "\trtskb_pool_empty: %ld\n" \
> +                    "\trtskb_ringbuffer: %ld\n" \
> +                    "\trtskb_ringbuffer_full: %ld\n" \
> +                    "\trx_dropped: %ld\n" \
> +                    "\trx_out: %ld\n" \
> +                    "\ttx_in: %ld\n" \
> +                    "\ttx_full: %ld\n" \
> +                    "\ttx_pending: %ld\n",
> +
> +                    stats.rtskb_pool_empty,
> +                    stats.rtskb_ringbuffer,
> +                    stats.rtskb_ringbuffer_full,
> +                    stats.rx_dropped,
> +                    stats.rx_out,
> +                    stats.tx_in,
> +                    stats.tx_full,
> +                    stats.tx_pending);
> +
> +#ifdef CONFIG_RTNET_CHECKED
> +  len += snprintf(buf + len, buf_cnt - len - 1, "rtskb pool_balance: %d\n",
> rtskb_pool.pool_balance);
> +#endif
> +
> +  *start = s_buf + offset;
> +  *eof = (len >= offset) ? 0 : 1;
> +  return eof ? (len - offset) : 0;
> +}
> +
>  /* ***********************************************************************
>   * Returns the next pointer from the ringbuffer or zero if nothing is
>   * available
> @@ -166,7 +235,7 @@ static void *write_to_ringbuffer(skb_exch_ringbuffer_t
> *pRing, void *p)
>  /* ************************************************************************
>   * net_device specific stuff:
>   * ************************************************************************
> */
> -static struct net_device dev_rtnetproxy;
> +static struct net_device* dev_rtnetproxy;
> 
>  static int rtnetproxy_xmit(struct sk_buff *skb, struct net_device *dev);
> 
> @@ -180,14 +249,18 @@ static int rtnetproxy_xmit(struct sk_buff *skb, struct
> net_device *dev);
> 
>  /* ************************************************************************
>   * This functions runs in rtai context.
> + *
> + * Linux->RTnet
> + *
> + * TX'ed rtskb's are to be freed by the low-level driver.
> + *
>   * It is called from rtnetproxy_transmit_thread whenever there is frame to
>   * sent out. Copies the standard linux sk_buff buffer to a rtnet buffer and
>   * sends it out using rtnet functions.
>   * ************************************************************************
> */
>  static inline void send_data_out(struct sk_buff *skb)
>  {
> -
> -    struct rtskb        *rtskb;
> +    struct rtskb* rtskb = NULL;
>  #ifndef CONFIG_RTNET_ADDON_PROXY_ARP
>      struct dest_route   rt;
>      int rc;
> @@ -203,14 +276,14 @@ static inline void send_data_out(struct sk_buff *skb)
>                                    * It represents the ethernet frame on the
> line and
>                                    * thus no spaces are allowed! */
> 
> -    struct skb_data_format *pData;
> +    struct skb_data_format* pData = NULL;
> +
> +    if(skb == NULL) return;

skb == NULL would be a bug - I bet this is a debugging left-over.

> 
>      /* Copy the data from the standard sk_buff to the realtime sk_buff:
>       * Both have the same length. */
>      rtskb = alloc_rtskb(skb->len, &rtskb_pool);
> -    if (NULL == rtskb) {
> -        return;
> -    }
> +    if(rtskb == NULL) return;

Stray change.

[Both variants are unclean BTW:

if (rtskb == NULL) / if (!rtskb)
        return;

]

> 
>      memcpy(rtskb->data, skb->data, skb->len);
>      rtskb->len = skb->len;
> @@ -225,6 +298,8 @@ static inline void send_data_out(struct sk_buff *skb)
>      /* Call the actual transmit function */
>      rtdev_xmit_proxy(rtskb);
> 
> +    rtskb = NULL;
> +

Useless assignment if you properly keep the kfree_rtskb under the
!CONFIG_RTNET_ADDON_PROXY_ARP umbrella, but...

>      rtdev_dereference(rtnetproxy_rtdev);
> 
>  #else /* !CONFIG_RTNET_ADDON_PROXY_ARP */
> @@ -239,8 +314,7 @@ static inline void send_data_out(struct sk_buff *skb)
>          /* check if IP source address fits */
>          if (rtdev->local_ip != pData->ip_src) {
>              rtdev_dereference(rtdev);
> -            kfree_rtskb(rtskb);
> -            return;
> +            goto out;
>          }
> 
>          rtskb->rtdev = rtdev;
> @@ -255,20 +329,27 @@ static inline void send_data_out(struct sk_buff *skb)
> 
>          /* The rtskb is freed somewhere deep in the driver...
>           * No need to do it here. */
> +        rtskb = NULL;
> 
>          rtdev_dereference(rtdev);
>      }
>      else
>      {
>          /* Routing failed => Free rtskb here... */
> -        kfree_rtskb(rtskb);
> +        goto out;
>      }
> -
>  #endif /* CONFIG_RTNET_ADDON_PROXY_ARP */
> +
> +  out:
> +    if(rtskb != NULL)
> +        kfree_rtskb(rtskb);

...unless I oversee something, this is plain refactoring, no functional
change, right? Then better leave as is instead of adding a conditional
(if (rtskb != NULL) to the common path. Alternative:

if (error)
        goto error_out;
...
error_out:
        kfree_rtskb(rtskb);

>  }
> 
>  /* ************************************************************************
>   * This is a RTAI thread. It will be activated (resumed) by the
> + *
> + * Linux->RTnet
> + *
>   * functions "rtnetproxy_xmit" or "rtnetproxy_kernel_recv" (in linux
> context)
>   * whenever new frames have to be sent out or if the
>   * "used" rtskb ringbuffer is full.
> @@ -280,17 +361,19 @@ static void rtnetproxy_transmit_thread(void *arg)
>      while (1)
>      {
>          /* Free all "used" rtskbs in ringbuffer */
> -        while ((del=read_from_ringbuffer(&ring_rtskb_kernel_rtnet)) !=
> 0)    {
> +        while ((del=read_from_ringbuffer(&ring_rtskb_kernel_rtnet)) != 0) {
>              kfree_rtskb(del);
> -    }
> +        }
> 
>          /* Send out all frames in the ringbuffer that have not been sent
> yet */
>          while ((skb = read_from_ringbuffer(&ring_skb_kernel_rtnet)) != 0)
>          {
>              send_data_out(skb);
> +            stats.tx_pending--;
>              /* Place the "used" skb in the ringbuffer back to kernel */
>              write_to_ringbuffer(&ring_skb_rtnet_kernel, skb);
>          }
> +
>          /* Will be activated with next frame to send... */
>          rtdm_sem_down(&rtnetproxy_sem);
>      }
> @@ -305,8 +388,12 @@ static void rtnetproxy_transmit_thread(void *arg)
>   * ************************************************************************
> */
>  static int rtnetproxy_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
> +    int ret = 0;
> +
> +    stats.tx_in++;
>      if (write_to_ringbuffer(&ring_skb_kernel_rtnet, skb))
>      {
> +        stats.tx_pending++;
>  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
>          dev->stats.tx_packets++;
>          dev->stats.tx_bytes+=skb->len;
> @@ -314,14 +401,17 @@ static int rtnetproxy_xmit(struct sk_buff *skb, struct
> net_device *dev)
>      }
>      else
>      {
> +        stats.tx_full++;
>          /* No space in the ringbuffer... */
>          printk("rtnetproxy_xmit - no space in queue\n");
> -        dev_kfree_skb(skb);  /* Free the standard skb. */
> +        /////dev_kfree_skb(skb);  /* Free the standard skb. */
> +        ret = NETDEV_TX_BUSY;

Is that the fix for the hanging TX? You see, it's very hard to tell
things apart when aplied as a single blob. Would you still be able to
answer this quickly in, say, a year from now?

>      }
> 
>      /* Signal rtnet that there are packets waiting to be processed...
>       * */
> -    rtdm_sem_up(&rtnetproxy_sem);
> +    if(ret != 0)
> +        rtdm_sem_up(&rtnetproxy_sem);

Just push sem_up into the success branch above. Every "if" costs cycles
(specifically if the CPU mispredicts it).

> 
>      /* Delete all "used" skbs that already have been processed... */
>      {
> @@ -331,7 +421,8 @@ static int rtnetproxy_xmit(struct sk_buff *skb, struct
> net_device *dev)
>              dev_kfree_skb(del_skb);  /* Free the standard skb. */
>          }
>      }
> -    return 0;
> +
> +    return ret;
>  }
> 
> 
> @@ -345,24 +436,29 @@ static int rtnetproxy_xmit(struct sk_buff *skb, struct
> net_device *dev)
>  /* ************************************************************************
>   * This function runs in real-time context.
>   *
> + * RTnet->Linux
> + *
>   * It is called from inside rtnet whenever a packet has been received that
>   * has to be processed by rtnetproxy.
>   * ************************************************************************
> */
>  static void rtnetproxy_recv(struct rtskb *rtskb)
>  {
>      /* Acquire rtskb (JK) */
> -    if (rtskb_acquire(rtskb, &rtskb_pool) != 0) {
> +    if (rtskb_acquire(rtskb, &rtskb_pool) != 0) { // steal rtskb from RTnet
> pool into our pool

Stray comment.

> +        stats.rtskb_pool_empty++;

That, e.g., could become an RX overrun on the Linux netdev, nicely
printed by ifconfig.

>          rtdm_printk("rtnetproxy_recv: No free rtskb in pool\n");
>          kfree_rtskb(rtskb);
>      }
>      /* Place the rtskb in the ringbuffer: */
> -    else if (write_to_ringbuffer(&ring_rtskb_rtnet_kernel, rtskb))
> +    else if (write_to_ringbuffer(&ring_rtskb_rtnet_kernel, rtskb)) //
> rtnetproxy_recv
>      {
> +        stats.rtskb_ringbuffer++;
>          /* Switch over to kernel context: */
>          rtdm_nrtsig_pend(&rtnetproxy_signal);
>      }
>      else
>      {
> +        stats.rtskb_ringbuffer_full++;
>          /* No space in ringbuffer => Free rtskb here... */
>          rtdm_printk("rtnetproxy_recv: No space in queue\n");
>          kfree_rtskb(rtskb);
> @@ -372,13 +468,16 @@ static void rtnetproxy_recv(struct rtskb *rtskb)
> 
>  /* ************************************************************************
>   * This function runs in kernel mode.
> + *
> + * RTnet->Linux
> + *
>   * It is activated from rtnetproxy_signal_handler whenever rtnet received a
>   * frame to be processed by rtnetproxy.
>   * ************************************************************************
> */
>  static inline void rtnetproxy_kernel_recv(struct rtskb *rtskb)
>  {
>      struct sk_buff *skb;
> -    struct net_device *dev = &dev_rtnetproxy;
> +    struct net_device *dev = dev_rtnetproxy;
> 
>      int header_len = rtskb->rtdev->hard_header_len;
>      int len        = rtskb->len + header_len;
> @@ -389,7 +488,6 @@ static inline void rtnetproxy_kernel_recv(struct rtskb
> *rtskb)
> 
>      memcpy(skb_put(skb, len), rtskb->data-header_len, len);
> 
> -
>      /* Set some relevant entries in the skb: */
>      skb->protocol=eth_type_trans(skb,dev);
>      skb->dev=dev;
> @@ -409,12 +507,16 @@ static inline void rtnetproxy_kernel_recv(struct rtskb
> *rtskb)
>      dev->stats.rx_packets++;
>  #endif
> 
> -    netif_rx(skb);  /* pass it to the received stuff */
> -
> +    if(netif_rx(skb) == NET_RX_SUCCESS)  /* pass it to the received stuff
> */
> +         stats.rx_out++;
> +    else stats.rx_dropped++;

if (condition)
        A;
else
        B;

>  }
> 
>  /* ************************************************************************
>   * This function runs in kernel mode.
> + *
> + * RTnet->Linux
> + *
>   * It is activated from rtnetproxy_recv whenever rtnet received a frame to
>   * be processed by rtnetproxy.
>   * ************************************************************************
> */
> @@ -422,8 +524,9 @@ static void rtnetproxy_signal_handler(rtdm_nrtsig_t
> nrtsig, void *arg)
>  {
>      struct rtskb *rtskb;
> 
> -    while ( (rtskb = read_from_ringbuffer(&ring_rtskb_rtnet_kernel)) != 0)
> +    while ( (rtskb = read_from_ringbuffer(&ring_rtskb_rtnet_kernel)) != 0)
> // rtnetproxy_signal_handler

Stray comment.

>      {
> +        stats.rtskb_ringbuffer--;
>          rtnetproxy_kernel_recv(rtskb);
>          /* Place "used" rtskb in backqueue... */
>          while (0 == write_to_ringbuffer(&ring_rtskb_kernel_rtnet, rtskb)) {
> @@ -462,7 +565,6 @@ static int __init rtnetproxy_init(struct net_device
> *dev)
>  {
>      /* Fill in device structure with ethernet-generic values. */
>      ether_setup(dev);
> -    dev->tx_queue_len = 0;

[Scratching head] Not needed anymore because...? I'm just concerned we
might miss some legacy scenario.

>  #ifdef CONFIG_RTNET_ADDON_PROXY_ARP
>      memcpy(dev->dev_addr, rtnetproxy_rtdev->dev_addr,
> sizeof(dev->dev_addr));
>  #else
> @@ -490,6 +592,15 @@ static int __init rtnetproxy_init_module(void)
>  {
>      int err;
> 
> +    procFileEntry = create_proc_read_entry(
> +                        "rtnetproxy_stats",
> +                        S_IFREG | S_IRUGO,
> +                        NULL,   /* base entry */
> +                        procfile_read,  /* read operation */

I've been told that proc_read/write is doomed to die soon. We still have
lots of it in RTnet, but we should not add more.

> +                        NULL    /* data */
> +                  );
> +    if(procFileEntry == NULL) return -ENOMEM;
> +
>  #ifdef CONFIG_RTNET_ADDON_PROXY_ARP
>      if ((rtnetproxy_rtdev = rtdev_get_by_name(rtdev_attach)) == NULL) {
>      printk("Couldn't attach to %s\n", rtdev_attach);
> @@ -505,29 +616,34 @@ static int __init rtnetproxy_init_module(void)
>          return -ENOMEM;
>      }
> 
> -#ifdef HAVE_NET_DEVICE_OPS
> -    dev_rtnetproxy.netdev_ops = &rtnetproxy_netdev_ops;
> -#else /* !HAVE_NET_DEVICE_OPS */
> -    dev_rtnetproxy.init = rtnetproxy_init;
> -    dev_rtnetproxy.hard_start_xmit = rtnetproxy_xmit;
> -    dev_rtnetproxy.set_multicast_list = set_multicast_list;
> -#ifdef CONFIG_NET_FASTROUTE
> -    dev->accept_fastpath = rtnetproxy_accept_fastpath;
> -#endif
> -#endif /* !HAVE_NET_DEVICE_OPS */
> +    /* alloc_etherdev ensures aligned and zeroed private structures */
> +    dev_rtnetproxy = alloc_etherdev(sizeof(struct rtnetproxy_priv));
> +    if (!dev_rtnetproxy) {
> +        printk (KERN_ERR "%s: ether device alloc failed, aborting\n",
> __FUNCTION__);
> +        return -ENOMEM;
> +    }
> 
> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
> -    SET_MODULE_OWNER(&dev_rtnetproxy);
> +#ifndef HAVE_NET_DEVICE_OPS
> +# error Must be compiled on a kernel that  HAVE_NET_DEVICE_OPS
>  #endif
> 
> -    /* Define the name for this unit */
> -    err=dev_alloc_name(&dev_rtnetproxy,"rtproxy");
> +    dev_rtnetproxy->netdev_ops = &rtnetproxy_netdev_ops;

This won't built without HAVE_NET_DEVICE_OPS. Another heavy user of
rtnetproxy would become quite unhappy then.

> +
> +    {
> +        struct rtnetproxy_priv* priv = netdev_priv(dev_rtnetproxy);

Coding style: please push variable declarations at the beginning of the
function.

> +        if(priv) {
> +            priv->dev = dev_rtnetproxy;
> +        }
> +    }
> 
> +    /* Define the name for this unit */
> +    err=dev_alloc_name(dev_rtnetproxy, "rtproxy");
>      if(err<0) {
>          rtskb_pool_release(&rtskb_pool);
>          return err;
>      }
> -    err = register_netdev(&dev_rtnetproxy);
> +
> +    err = register_netdev(dev_rtnetproxy);
>      if (err<0) {
>          rtskb_pool_release(&rtskb_pool);
>          return err;
> @@ -551,7 +667,7 @@ static int __init rtnetproxy_init_module(void)
>      /* Register with RTnet */
>      rt_ip_fallback_handler = rtnetproxy_recv;
> 
> -    printk("rtnetproxy installed as \"%s\"\n", dev_rtnetproxy.name);
> +    printk("rtnetproxy installed as \"%s\"\n", dev_rtnetproxy->name);
> 
>      return 0;
>  }
> @@ -586,16 +702,20 @@ static void __exit rtnetproxy_cleanup_module(void)
>          {
>              kfree_rtskb(del); // Although this is kernel mode, freeing
> should work...
>          }
> -        while ((del=read_from_ringbuffer(&ring_rtskb_rtnet_kernel))!=0)
> +        while ((del=read_from_ringbuffer(&ring_rtskb_rtnet_kernel))!=0) //
> rtnetproxy_cleanup_module

Stray comment.

>          {
>              kfree_rtskb(del); // Although this is kernel mode, freeing
> should work...
>          }
>      }
> 
>      /* Unregister the net device: */
> -    unregister_netdev(&dev_rtnetproxy);
> +    unregister_netdev(dev_rtnetproxy);
> +
> +    free_netdev(dev_rtnetproxy);
> 
>      rtskb_pool_release(&rtskb_pool);
> +
> +    remove_proc_entry(procFileEntry->name, procFileEntry->subdir);
>  }
> 
>  module_init(rtnetproxy_init_module);


Attachment: signature.asc
Description: OpenPGP digital signature

------------------------------------------------------------------------------

_______________________________________________
RTnet-users mailing list
RTnet-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rtnet-users

Reply via email to