Re: [OpenWrt-Devel] [RFC] Broadcom code found.

2014-01-28 Thread José Vázquez
2014-01-11, José Vázquez Fernández ppvazquez...@gmail.com:
 While Daniel González and me were fighting with jffs2 tested some code
 extracted from Netgear. Here are what we found.
 We only tested brcm_wait, broadcom checksum code and the modification in
 tlbex.c and nothing strange happened when we flashed it.
 Hope this could help for the Broadcom SoCs and maybe others.


 diff -urN b/include/asm-mips/checksum.h a/include/asm-mips/checksum.h
 --- b/include/asm-mips/checksum.h 2007-06-12 16:13:11.0 +0200
 +++ a/include/asm-mips/checksum.h 2010-05-31 03:43:32.0 +0200
 @@ -98,6 +98,64 @@
   *   By Jorge Cwik jo...@laser.satlink.net, adapted for linux by
   *   Arnt Gulbrandsen.
   */
 +
 +#if defined(CONFIG_MIPS_BRCM)
 +
 +/* Brcm version can handle unaligned data. Merged from brcm 2.6.8
 kernel.*/
 +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 +{
 + if (((__u32)iph0x3) == 0) {
 + unsigned int *word = (unsigned int *) iph;
 + unsigned int *stop = word + ihl;
 + unsigned int csum;
 + int carry;
 +
 + csum = word[0];
 + csum += word[1];
 + carry = (csum  word[1]);
 + csum += carry;
 +
 + csum += word[2];
 + carry = (csum  word[2]);
 + csum += carry;
 +
 + csum += word[3];
 + carry = (csum  word[3]);
 + csum += carry;
 +
 + word += 4;
 + do {
 + csum += *word;
 + carry = (csum  *word);
 + csum += carry;
 + word++;
 + } while (word != stop);
 +
 + return csum_fold(csum);
 + } else {
 + __u16 * buff = (__u16 *) iph;
 + __u32 sum=0;
 + __u16 i;
 +
 + // make 16 bit words out of every two adjacent 8 bit words in
 the packet
 + // and add them up
 + for (i=0;iihl*2;i++){
 + sum = sum + (__u32) buff[i];
 + }
 +
 + // take only 16 bits out of the 32 bit sum and add up the
 carries
 + while (sum16)
 +   sum = (sum  0x)+(sum  16);
 +
 + // one's complement the result
 + sum = ~sum;
 +
 + return ((__sum16) sum);
 + }
 +}
 +
 +#else
 +
  static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
  {
   const unsigned int *word = iph;

I've tested the above code and the network throughput improved between
0'5 to 1% while running rsync with cifs and a 500 GB usb hdd with a
BCM63281 based board.
___
openwrt-devel mailing list
openwrt-devel@lists.openwrt.org
https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel


[OpenWrt-Devel] [RFC] Broadcom code found.

2014-01-10 Thread José Vázquez Fernández
While Daniel González and me were fighting with jffs2 tested some code
extracted from Netgear. Here are what we found.
We only tested brcm_wait, broadcom checksum code and the modification in
tlbex.c and nothing strange happened when we flashed it.
Hope this could help for the Broadcom SoCs and maybe others.


diff -urN b/include/asm-mips/checksum.h a/include/asm-mips/checksum.h
--- b/include/asm-mips/checksum.h   2007-06-12 16:13:11.0 +0200
+++ a/include/asm-mips/checksum.h   2010-05-31 03:43:32.0 +0200
@@ -98,6 +98,64 @@
  * By Jorge Cwik jo...@laser.satlink.net, adapted for linux by
  * Arnt Gulbrandsen.
  */
+
+#if defined(CONFIG_MIPS_BRCM)
+
+/* Brcm version can handle unaligned data. Merged from brcm 2.6.8
kernel.*/
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+   if (((__u32)iph0x3) == 0) {
+   unsigned int *word = (unsigned int *) iph;
+   unsigned int *stop = word + ihl;
+   unsigned int csum;
+   int carry;
+
+   csum = word[0];
+   csum += word[1];
+   carry = (csum  word[1]);
+   csum += carry;
+
+   csum += word[2];
+   carry = (csum  word[2]);
+   csum += carry;
+
+   csum += word[3];
+   carry = (csum  word[3]);
+   csum += carry;
+
+   word += 4;
+   do {
+   csum += *word;
+   carry = (csum  *word);
+   csum += carry;
+   word++;
+   } while (word != stop);
+
+   return csum_fold(csum);
+   } else {
+   __u16 * buff = (__u16 *) iph;
+   __u32 sum=0;
+   __u16 i;
+
+   // make 16 bit words out of every two adjacent 8 bit words in
the packet
+   // and add them up
+   for (i=0;iihl*2;i++){
+   sum = sum + (__u32) buff[i];
+   }
+
+   // take only 16 bits out of the 32 bit sum and add up the
carries
+   while (sum16)
+ sum = (sum  0x)+(sum  16);
+
+   // one's complement the result
+   sum = ~sum;
+
+   return ((__sum16) sum);
+   }
+}
+
+#else
+
 static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
const unsigned int *word = iph;
@@ -129,6 +187,8 @@
return csum_fold(csum);
 }
 
+#endif
+
 static inline __wsum csum_tcpudp_nofold(__be32 saddr,
__be32 daddr, unsigned short len, unsigned short proto,
__wsum sum)

--

diff -urN b/drivers/mtd/mtd_blkdevs.c a/drivers/mtd/mtd_blkdevs.c
--- b/drivers/mtd/mtd_blkdevs.c 2007-06-12 16:13:11.0 +0200
+++ a/drivers/mtd/mtd_blkdevs.c 2010-05-31 03:52:56.0 +0200
@@ -21,6 +21,9 @@
 #include linux/init.h
 #include linux/mutex.h
 #include asm/uaccess.h
+#if defined(CONFIG_MIPS_BRCM)
+#include linux/syscalls.h
+#endif
 
 static LIST_HEAD(blktrans_majors);
 
@@ -80,13 +83,23 @@
struct mtd_blktrans_ops *tr = arg;
struct request_queue *rq = tr-blkcore_priv-rq;
 
+#if defined(CONFIG_MIPS_BRCM)
+#if defined (CONFIG_PREEMPT_SOFTIRQS)
+   /* mtdblockd needs to run at the same priority as ksoftirqd threads so
loading of applications from flash won't get blocked by network traffic.
+   One bad thing about blocking application loading is that voice
applications can be blocked by network traffic, despite that they have
higher
+   priority than network tasks. This would be a priority inversion
scenario if happens. */
+   struct sched_param param = { .sched_priority =
CONFIG_BRCM_SOFTIRQ_BASE_RT_PRIO };
+   sched_setscheduler(current, SCHED_RR, param);
+#endif
+#endif
+
/* we might get involved when memory gets low, so use PF_MEMALLOC */
current-flags |= PF_MEMALLOC | PF_NOFREEZE;
 
daemonize(%sd, tr-name);
 
/* daemonize() doesn't do this for us since some kernel threads
-  actually want to deal with signals. We can't just call
+  actually want to deal with signals. We can't just call 
   exit_sighand() since that'll cause an oops when we finally
   do exit. */
spin_lock_irq(current-sighand-siglock);

-

diff -urN b/include/linux/mmzone.h a/include/linux/mmzone.h
--- b/include/linux/mmzone.h2007-06-12 16:13:11.0 +0200
+++ a/include/linux/mmzone.h2010-05-31 03:45:11.0 +0200
@@ -306,7 +306,17 @@
  * go. A value of 12 for DEF_PRIORITY implies that we will scan
1/4096th of the
  * queues (queue_length  12) during an aging round.
  */
+
+#if defined(CONFIG_MIPS_BRCM)
+/* We normally have only 8M~32M of RAM while desktop systems