date:20071121

Re: RE: is it useful testing __LINK_STATE_RX_SCHED in dev_close()?

2007-11-21 Thread Herbert Xu

On Wed, Nov 21, 2007 at 03:09:52PM +0800, [EMAIL PROTECTED] wrote:
 
 __LINK_STATE_RX_SCHED still exist in kernel 2.6.23.8.

You'll find that it's gone in 2.6.24-rc3.  In any case, the code
was racy but it's too unlikely (and the fix too intrusive) to be
worth fixing in 2.6.23 at this stage.  

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: 2.6.24-rc3: find complains about /proc/net

2007-11-21 Thread Pavel Emelyanov

Eric W. Biederman wrote:
 Below is a preliminary patch.  It solves the directory issue but it doesn't
 play well with proc_mnt and proc_flush_task.  It works by simply caching the
 network namespace when we mount proc so we don't have to be fancy and dynamic.

Nice... Where should we apply this patch to?

 Something for the discussion anyway.
 
 I will start sorting out what makes sense tomorrow.
 
 Eric
 
 
From f359fde2469ba8be2123a465e788a83c7e6831e9 Mon Sep 17 00:00:00 2001
 From: Eric W. Biederman [EMAIL PROTECTED]
 Date: Tue, 20 Nov 2007 19:36:05 -0700
 Subject: [PATCH] proc: Fix /proc/net directory listings.
 
 Having proc dynamically display the contents of /proc/net is
 hard.  So make life simpler by capturing the network namespace
 when we mount proc and only displaying that network namespace.
 
 ---
  fs/proc/base.c  |8 ++--
  fs/proc/generic.c   |4 ++-
  fs/proc/internal.h  |   13 +++
  fs/proc/proc_net.c  |   89 
 ---
  fs/proc/root.c  |   50 ++
  include/linux/proc_fs.h |4 ++
  6 files changed, 66 insertions(+), 102 deletions(-)
 
 diff --git a/fs/proc/base.c b/fs/proc/base.c
 index aeaf0d0..9d4f06a 100644
 --- a/fs/proc/base.c
 +++ b/fs/proc/base.c
 @@ -2395,7 +2395,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, 
 struct dentry * dentry, struct
   if (tgid == ~0U)
   goto out;
  
 - ns = dentry-d_sb-s_fs_info;
 + ns = proc_sbi(dentry-d_sb)-pid_ns;
   rcu_read_lock();
   task = find_task_by_pid_ns(tgid, ns);
   if (task)
 @@ -2476,7 +2476,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, 
 filldir_t filldir)
   goto out;
   }
  
 - ns = filp-f_dentry-d_sb-s_fs_info;
 + ns = proc_sbi(filp-f_dentry-d_sb)-pid_ns;
   tgid = filp-f_pos - TGID_OFFSET;
   for (task = next_tgid(tgid, ns);
task;
 @@ -2615,7 +2615,7 @@ static struct dentry *proc_task_lookup(struct inode 
 *dir, struct dentry * dentry
   if (tid == ~0U)
   goto out;
  
 - ns = dentry-d_sb-s_fs_info;
 + ns = proc_sbi(dentry-d_sb)-pid_ns;
   rcu_read_lock();
   task = find_task_by_pid_ns(tid, ns);
   if (task)
 @@ -2758,7 +2758,7 @@ static int proc_task_readdir(struct file * filp, void * 
 dirent, filldir_t filldi
   /* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
 - ns = filp-f_dentry-d_sb-s_fs_info;
 + ns = proc_sbi(filp-f_dentry-d_sb)-pid_ns;
   tid = (int)filp-f_version;
   filp-f_version = 0;
   for (task = first_tid(leader, tid, pos - 2, ns);
 diff --git a/fs/proc/generic.c b/fs/proc/generic.c
 index 1bdb624..b58f0ec 100644
 --- a/fs/proc/generic.c
 +++ b/fs/proc/generic.c
 @@ -398,7 +398,9 @@ struct dentry *proc_lookup(struct inode * dir, struct 
 dentry *dentry, struct nam
   continue;
   if (!memcmp(dentry-d_name.name, de-name, 
 de-namelen)) {
   unsigned int ino = de-low_ino;
 -
 + 
 + if (de-shadow_proc)
 + de = de-shadow_proc(dentry-d_sb, de);
   de_get(de);
   spin_unlock(proc_subdir_lock);
   error = -EINVAL;
 diff --git a/fs/proc/internal.h b/fs/proc/internal.h
 index 1820eb2..a26f115 100644
 --- a/fs/proc/internal.h
 +++ b/fs/proc/internal.h
 @@ -11,6 +11,18 @@
  
  #include linux/proc_fs.h
  
 +struct pid_namespace;
 +struct net;
 +struct proc_sb_info {
 + struct pid_namespace *pid_ns;
 + struct net   *net_ns;
 +};
 +
 +static inline struct proc_sb_info *proc_sbi(struct super_block *sb)
 +{
 + return sb-s_fs_info;
 +}
 +
  #ifdef CONFIG_PROC_SYSCTL
  extern int proc_sys_init(void);
  #else
 @@ -78,3 +90,4 @@ static inline int proc_fd(struct inode *inode)
  {
   return PROC_I(inode)-fd;
  }
 +
 diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
 index 131f9c6..8a82e29 100644
 --- a/fs/proc/proc_net.c
 +++ b/fs/proc/proc_net.c
 @@ -50,89 +50,15 @@ struct net *get_proc_net(const struct inode *inode)
  }
  EXPORT_SYMBOL_GPL(get_proc_net);
  
 -static struct proc_dir_entry *proc_net_shadow;
 +static struct proc_dir_entry *shadow_pde;
  
 -static struct dentry *proc_net_shadow_dentry(struct dentry *parent,
 - struct proc_dir_entry *de)
 +static struct proc_dir_entry *proc_net_shadow(struct super_block *sb,
 +   struct proc_dir_entry *de)
  {
 - struct dentry *shadow = NULL;
 - struct inode *inode;
 - if (!de)
 - goto out;
 - de_get(de);
 - inode = proc_get_inode(parent-d_inode-i_sb, de-low_ino, de);
 - if (!inode)
 - goto out_de_put;
 -

Re: HTB/HSFC shaping precision

2007-11-21 Thread Jarek Poplawski

On 20-11-2007 22:21, Denys Fedoryshchenko wrote:
...
 If traffic is dropped - it will be resent, a lot of energy will be wasted for 
 nothing. Same bytes will pass all long way around earth just because i am not 
 able to manage my QoS box :-)

Sure, but you'll use probably almost every bit you've payed for!

 
 Plus uplink bandwidth will be used for that, i am using my own protocol(it is 
 TCP accelerator for satellite communications based on NACK and streaming 
 compression, so each resend - it is few bytes more on uplink and additional 
 delay. Ah yes, even resend over TCP it is more delay, than if it will be 
 queued for few milliseconds on bottleneck. 
 
 Plus if buffer on STM-1 interface way too small - smallest spike will cause 
 packetlossy, and sitation can be far away from congestion. As result it will 
 be very difficult to reach maximum bandwidth on such link. And linux box in 
 this situation is magic box, which can help to save energy, hungry people and 
 help to use resources efficiently :-)
 

I'm still not sure how this traffic goes around, because eg., if you
receive something through a satelite, then it would only make sense if
it were controlled earlier to the same speed too. Otherwise you should
have this dropping on your HTB (of course you could use big buffers,
but anyway...), instead of STM, but resending could be similar.

But, if you have full control on your side, it looks like a kind of
realtime traffic, and then HFSC should be more appropriate for this
(but I only 'heard' about this).

 Yes, for sure. Thats what i am reading almost each day, when i dont 
 understand something clearly. But, my english is far away from good, so 
 sometimes i just misunderstand something even in good manual.

Then good news: read the code! There is really as little English as
possible...

Cheers,
Jarek P.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module

2007-11-21 Thread Ryousei Takano

This patch includes the PSPacer (Precise Software Pacer) qdisc
module, which achieves precise transmission bandwidth control.
You can find more information at the project web page
(http://www.gridmpi.org/gridtcp.jsp).

Signed-off-by: Ryousei Takano [EMAIL PROTECTED]
---
 include/linux/pkt_sched.h |   38 ++
 net/sched/Kconfig |9 +
 net/sched/Makefile|1 +
 net/sched/sch_psp.c   |  959 +
 4 files changed, 1007 insertions(+), 0 deletions(-)
 create mode 100644 net/sched/sch_psp.c

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 919af93..d3f8afd 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -430,6 +430,44 @@ enum {
 
 #define TCA_ATM_MAX(__TCA_ATM_MAX - 1)
 
+/* Precise Software Pacer section */
+
+#define TC_PSP_MAXDEPTH (8)
+
+typedef long long gapclock_t;
+
+enum {
+   MODE_NORMAL = 0,
+   MODE_STATIC = 1,
+};
+
+struct tc_psp_copt
+{
+   __u32   level;
+   __u32   mode;
+   __u32   rate;
+};
+
+struct tc_psp_qopt
+{
+   __u32   defcls;
+   __u32   rate;
+   __u32   direct_pkts;
+};
+
+struct tc_psp_xstats
+{
+   __u32   bytes;  /* gap packet statistics */
+   __u32   packets;
+};
+
+enum
+{
+   TCA_PSP_UNSPEC,
+   TCA_PSP_COPT,
+   TCA_PSP_QOPT,
+};
+
 /* Network emulator */
 
 enum
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9c15c48..ec40e43 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -184,6 +184,15 @@ config NET_SCH_DSMARK
  To compile this code as a module, choose M here: the
  module will be called sch_dsmark.
 
+config NET_SCH_PSP
+   tristate Precise Software Pacer (PSP)
+   ---help---
+ Say Y here if you want to include PSPacer module, which means
+ that you will be able to control precise pacing.
+
+ To compile this driver as a module, choose M here: the
+ module will be called sch_psp.
+
 config NET_SCH_NETEM
tristate Network emulator (NETEM)
---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 81ecbe8..85425c2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
 obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
 obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
 obj-$(CONFIG_NET_SCH_ATM)  += sch_atm.o
+obj-$(CONFIG_NET_SCH_PSP)  += sch_psp.o
 obj-$(CONFIG_NET_SCH_NETEM)+= sch_netem.o
 obj-$(CONFIG_NET_CLS_U32)  += cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)   += cls_route.o
diff --git a/net/sched/sch_psp.c b/net/sched/sch_psp.c
new file mode 100644
index 000..5c56742
--- /dev/null
+++ b/net/sched/sch_psp.c
@@ -0,0 +1,959 @@
+/*
+ * net/sched/sch_psp.c PSPacer: Precise Software Pacer
+ *
+ * Copyright (C) 2004-2007 National Institute of Advanced 
+ * Industrial Science and Technology (AIST), Japan.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Ryousei Takano, [EMAIL PROTECTED]
+ */
+
+#include linux/module.h
+#include linux/types.h
+#include linux/kernel.h
+#include linux/netdevice.h
+#include linux/skbuff.h
+#include linux/rtnetlink.h
+#include linux/ethtool.h
+#include linux/if_arp.h
+#include linux/in.h
+#include linux/ip.h
+#include net/pkt_sched.h
+
+/* PSPacer achieves precise rate regulation results, and no microscopic
+ * burst transmission which exceeds the limit is generated.
+ *
+ * The basic idea is that transmission timing can be precisely controlled,
+ * if packets are sent back-to-back at the wire rate.  PSPacer controls 
+ * the packet transmision intervals by inserting additional packets, 
+ * called gap packets, between adjacent packets.  The transmission interval
+ * can be controlled accurately by adjusting the number and size of the gap
+ * packets. PSPacer uses the 802.3x PAUSE frame as the gap packet.
+ *
+ * For the purpose of adjusting the gap size, this Qdisc maintains a byte
+ * clock which is recorded by a total transmitted byte per connection.
+ * Each sub-class has a class local clock which is used to make decision
+ * whether to send a packet or not.  If there is not any packets to send,
+ * gap packets are inserted.
+ *
+ * References:
+ * [1] R.Takano, T.Kudoh, Y.Kodama, M.Matsuda, H.Tezuka, and Y.Ishikawa,
+ * Design and Evaluation of Precise Software Pacing Mechanisms for
+ * Fast Long-Distance Networks, PFLDnet2005.
+ * [2] http://www.gridmpi.org/gridtcp.jsp
+ */
+
+#define HW_GAP (16)/* Preamble(8) + Inter Frame Gap(8) */
+#define FCS(4) /* Frame Check Sequence(4) */
+#define MIN_GAP (64)   /* Minimum size of gap packet */
+#define MIN_TARGET_RATE (1000) /* 1 KB/s (= 8

[RFC][PATCH 2/3] TC: PSPacer qdisc module

2007-11-21 Thread Ryousei Takano

This patch includes the PSPacer (Precise Software Pacer) qdisc
tc part, which achieves precise transmission bandwidth control.
You can find more information at the project web page
(http://www.gridmpi.org/gridtcp.jsp).

Signed-off-by: Ryousei Takano [EMAIL PROTECTED]
---
 include/linux/pkt_sched.h |   38 +
 tc/Makefile   |1 +
 tc/q_psp.c|  200 +
 3 files changed, 239 insertions(+), 0 deletions(-)
 create mode 100644 tc/q_psp.c

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 268c515..c708082 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -430,6 +430,44 @@ enum {
 
 #define TCA_ATM_MAX(__TCA_ATM_MAX - 1)
 
+/* Precise Software Pacer section */
+
+#define TC_PSP_MAXDEPTH (8)
+
+typedef long long gapclock_t;
+
+enum {
+   MODE_NORMAL = 0,
+   MODE_STATIC = 1,
+};
+
+struct tc_psp_copt
+{
+   __u32   level;
+   __u32   mode;
+   __u32   rate;
+};
+
+struct tc_psp_qopt
+{
+   __u32   defcls;
+   __u32   rate;
+   __u32   direct_pkts;
+};
+
+struct tc_psp_xstats
+{
+   __u32   bytes;  /* gap packet statistics */
+   __u32   packets;
+};
+
+enum
+{
+   TCA_PSP_UNSPEC,
+   TCA_PSP_COPT,
+   TCA_PSP_QOPT,
+};
+
 /* Network emulator */
 
 enum
diff --git a/tc/Makefile b/tc/Makefile
index a715566..836df9d 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -12,6 +12,7 @@ TCMODULES += q_prio.o
 TCMODULES += q_tbf.o
 TCMODULES += q_cbq.o
 TCMODULES += q_rr.o
+TCMODULES += q_psp.o
 TCMODULES += q_netem.o
 TCMODULES += f_rsvp.o
 TCMODULES += f_u32.o
diff --git a/tc/q_psp.c b/tc/q_psp.c
new file mode 100644
index 000..e3f4cf7
--- /dev/null
+++ b/tc/q_psp.c
@@ -0,0 +1,200 @@
+/*
+ * q_psp.c PSPacer: Precise Software Pacer
+ *
+ * Copyright (C) 2004-2007 National Institute of Advanced 
+ * Industrial Science and Technology (AIST), Japan.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Ryousei Takano, [EMAIL PROTECTED]
+ *
+ */
+
+#include stdio.h
+#include stdlib.h
+#include unistd.h
+#include syslog.h
+#include fcntl.h
+#include sys/socket.h
+#include netinet/in.h
+#include arpa/inet.h
+#include string.h
+
+#include utils.h
+#include tc_util.h
+
+static void explain(void)
+{
+   fprintf(stderr, 
+Usage: ... qdisc add ... psp [ default N ] [rate RATE]\n
+ default  minor id of class to which unclassified packets are sent {0}\n
+ rate physical interface bandwidth\n\n
+... class add ... psp mode M [ rate MBPS ]\n
+ mode target rate estimation method (NORMAL=0 STATIC=1) {0}\n
+ rate rate allocated to this class\n);
+}
+
+static void explain1(char *arg)
+{
+   fprintf(stderr, Illegal \%s\\n, arg);
+   explain();
+}
+
+
+static int psp_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+struct nlmsghdr *n)
+{
+   struct tc_psp_qopt qopt;
+   struct rtattr *tail;
+   memset(qopt, 0, sizeof(qopt));
+
+   while (argc  0) {
+   if (matches(*argv, rate) == 0) {
+   NEXT_ARG();
+   if (get_rate(qopt.rate, *argv)) {
+   explain1(rate);
+   return -1;
+   }
+   } else if (matches(*argv, default) == 0) {
+   NEXT_ARG();
+   if (get_u32(qopt.defcls, *argv, 16)) {
+   explain1(default);
+   return -1;
+   }
+   } else if (matches(*argv, help) == 0) {
+   explain();
+   return -1;
+   } else {
+   fprintf(stderr, What is \%s\?\n, *argv);
+   explain();
+   return -1;
+   }
+   argc--;
+   argv++;
+   }
+
+   tail = NLMSG_TAIL(n);
+   addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+   addattr_l(n, 2024, TCA_OPTIONS, qopt, NLMSG_ALIGN(sizeof(qopt)));
+   tail-rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
+   return 0;
+}
+
+static int psp_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+   struct rtattr *tb[TCA_PSP_QOPT+1];
+   struct tc_psp_copt *copt;
+   struct tc_psp_qopt *qopt;
+   SPRINT_BUF(b);
+
+   if (opt == NULL)
+   return 0;
+
+   memset(tb, 0, sizeof(tb));
+   parse_rtattr_nested(tb, TCA_PSP_QOPT, opt);
+
+   if (tb[TCA_PSP_COPT]) {
+   copt = RTA_DATA(tb[TCA_PSP_COPT]);
+   if (RTA_PAYLOAD(tb[TCA_PSP_COPT])  sizeof(*copt))
+

[RFC][PATCH 3/3] TC: PSPacer man page

2007-11-21 Thread Ryousei Takano

This patch includes the man page of the PSPacer (Precise Software
Pacing) qdisc module.

Signed-off-by: Ryousei Takano [EMAIL PROTECTED]
---
 man/man8/tc-psp.8 |  166 +
 1 files changed, 166 insertions(+), 0 deletions(-)
 create mode 100644 man/man8/tc-psp.8

diff --git a/man/man8/tc-psp.8 b/man/man8/tc-psp.8
new file mode 100644
index 000..a6e26bf
--- /dev/null
+++ b/man/man8/tc-psp.8
@@ -0,0 +1,166 @@
+.TH PSP 8 13 October 2007 iproute2 Linux
+.SH NAME
+PSP \- Precise Software Pacer
+.SH SYNOPSIS
+.B tc qdisc ... dev
+dev
+.B  ( parent
+classid 
+.B | root) [ handle 
+major: 
+.B ] psp [ default 
+minor-id
+.B ] [ rate
+rate
+.B ] 
+
+.B tc class ... dev
+dev
+.B parent 
+major:[minor]
+.B [ classid 
+major:minor
+.B ] psp rate
+rate
+.B ] [ mode 
+mode
+.B ] 
+
+.SH DESCRIPTION
+Precise Software Pacer (PSPacer) is a classful queuing discipline 
+which controls traffic with
+.BR tc (8)
+command.
+PSP achieves a precise pacing per class.
+
+.SH GAP PACKET
+The key to realizing precise pacing is to control the starting time of 
+the transmission of each packet.  We propose a simple yet accurate 
+mechanism to trigger the transmission of a packet.  That is, to insert 
+a gap packet between the real packets.  The gap packet produces a gap 
+between sequentially transmitted real packets.
+We employ a PAUSE packet as a gap packet.  A PAUSE packet is defined in 
+the IEEE 802.3x flow control.
+
+By changing the gap packet size, the starting time of 
+the next real packet transmission can be precisely controlled.
+For example, to control a half rate transmission, a gap packet is inserted 
+between every real packet where the gap packet size is the same as 
+that of the real packets.
+
+.SH BYTE CLOCK SCHEDULING
+Packet transmission is scheduled based on the inter-packet gap of each 
+class (i.e. target rate).
+If the network has multiple bottleneck links, it is necessary to 
+schedule the order of packet transmission and the packet interval.  
+
+PSPacer maintains a virtual clock which is counted by the total transmitted 
+byte instead of real time clock.  Each sub-class has its local clock 
+which is used to make decision whether to send a packet or not.
+If there is an idle time, a gap packet is inserted.
+
+.SH CLASSIFICATION
+Within one PSP instance, many classes may exist. Each of these classes
+contains its own qdisc.
+
+When enqueuing a packet, PSP starts at the root and uses various methods to 
+determine which class should be used to obtain the data to be enqueued. 
+
+In the standard configuration, this process is rather easy. 
+At each node we look for an instruction, and then go to the class the 
+instruction refers to. If the class found is a leaf-node (without 
+children), we enqueue the packet there. If it is not yet a leaf node, we do 
+the same thing over again starting from that node. 
+
+The following actions are performed in order at each node we visit, until 
+move to another node, or terminates the process.
+.TP
+(i)
+Consult filters attached to the class. If we are at a leaf node, we are done. 
+Otherwise, restart.
+.TP
+(ii)
+If none of the above returned with an instruction, send to the default class.
+.P
+./ This algorithm makes sure that a packet always ends up somewhere, even while
+./ you are busy building your configuration. 
+
+.SH QDISC
+The root of a PSP qdisc class tree has the following parameters:
+
+.TP 
+parent major:minor | root
+This mandatory parameter determines the place of the PSP instance, 
+either at the
+.B root
+of an interface or within an existing class.
+.TP
+handle major:
+Like all other qdiscs, the PSP can be assigned a handle. It should consist only
+of a major number, followed by a colon. Optional, but it is very useful 
+if classes will be generated within this qdisc.
+.TP 
+default minor-id
+Unclassified traffic is sent to the class with this minor-id.
+.TP
+rate rate
+Optional.  You can explicitly specify the maximum transmission rate.
+For example, if a 33MHz/32bit PCI bus is used to connect a Gigabit 
+Ethernet network interface, the bottleneck is the PCI bus, and the 
+system can not transmit packets at the rate of gigabit/sec. 
+
+.SH CLASSES
+Classes have a host of parameters to configure their operation.
+
+.TP 
+parent major:minor
+Specifies the place of this class within the hierarchy. If attached directly 
+to a qdisc and not to another class, minor can be omitted. Mandatory.
+.TP 
+classid major:minor
+Like qdiscs, classes can be named. The major number must be equal to the
+major number of the qdisc to which it belongs. Optional, but needed if this 
+class is going to have children.
+.TP 
+rate rate
+Maximum transmission rate this class including all its children are assigned. 
+Optional, but required if this class is set to mode 1 (static target rate).
+.TP
+mode mode
+Range from 0 to 1.  The mode 0 is without pacing.  The mode 1 is
+pacing based on static target rate estimation.

Re: HTB/HSFC shaping precision

2007-11-21 Thread Denys Fedoryshchenko

On Wed, 21 Nov 2007 10:47:10 +0100, Jarek Poplawski wrote
 
 But, if you have full control on your side, it looks like a kind of
 realtime traffic, and then HFSC should be more appropriate for this
 (but I only 'heard' about this).

One message later, thats what i dreamed about :-)
Subject: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module 
On website they have very good explanation... 
http://www.gridmpi.org/gridtcp.jsp

--
Denys Fedoryshchenko
Technical Manager
Virtual ISP S.A.L.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module

2007-11-21 Thread Patrick McHardy


Ryousei Takano wrote:

This patch includes the PSPacer (Precise Software Pacer) qdisc
module, which achieves precise transmission bandwidth control.
You can find more information at the project web page
(http://www.gridmpi.org/gridtcp.jsp).



Looks good, but please run checkpatch over it. A few more comments
below.

+/* Precise Software Pacer section */
+
+#define TC_PSP_MAXDEPTH (8)
+
+typedef long long gapclock_t;
+
+enum {
+   MODE_NORMAL = 0,
+   MODE_STATIC = 1,
+};
+
+struct tc_psp_copt
+{
+   __u32   level;
+   __u32   mode;
+   __u32   rate;


What unit is rate measures in? Is 32 bit really enough?



+static struct sk_buff *alloc_gap_packet(struct Qdisc* sch, int size)
+{
+   struct sk_buff *skb;
+   struct net_device *dev = sch-dev;
+   unsigned char *pkt;
+   int pause_time = 0;
+   int pktsize = size + 2;
+
+   skb = alloc_skb(pktsize, GFP_ATOMIC);
+   if (!skb)
+   return NULL;
+
+   skb_reserve(skb, 2);
+
+   pkt = skb-data;
+   memset(pkt, 0xff, pktsize);
+   pkt[0] = 0x01; /* dst address: 01:80:c2:00:00:01 */
+   pkt[1] = 0x80;
+   pkt[2] = 0xc2;
+   pkt[3] = 0x00;
+   pkt[4] = 0x00;
+   pkt[5] = 0x01;
+   memcpy(pkt + 6, dev-dev_addr, ETH_ALEN /* dev-addr_len */);
+
+   pkt[12] = 0x88; /* MAC control:88 08 */
+   pkt[13] = 0x08;
+   pkt[14] = 0;/* MAC control opcode: 00 01 */
+   pkt[15] = 1;


A few #defines for all these magic values and a struct for
the header would make this nicer.


+   pkt[16] = pause_time  8;
+   pkt[17] = pause_time;
+
+   skb_put(skb, size);
+
+   skb-dev = sch-dev;
+   skb-protocol = ETH_P_802_3;
+   skb_reset_network_header(skb); /* It is refered at 
dev_queue_xmit_nit(). */
+
+   return skb;
+}
+



+static struct psp_class *psp_classify(struct sk_buff *skb, struct Qdisc *sch,
+ int *qres)
+{
+   struct psp_sched_data *q = qdisc_priv(sch);
+   struct psp_class *cl;
+   struct tcf_result res;
+   struct tcf_proto *tcf;
+   int result;
+
+   if ((cl = psp_find(skb-priority, sch)) != NULL  cl-level == 0)
+   return cl;
+   tcf = q-filter_list;


This should handle tc actions.


+   if (tcf  (result = tc_classify(skb, tcf, res)) = 0) {


It seems you can have a hierarchy of classes, so why aren't
you classifying recursively?


+   if ((cl = (struct psp_class *)res.class) == NULL) {
+   if ((cl = psp_find(res.classid, sch)) == NULL) {
+   /* filter selected invalid classid */
+   goto try_default;
+   }
+   }
+   if (is_leaf_class(cl))
+   return cl; /* hit leaf class */
+
+   /* apply inner filter chain */
+   tcf = cl-filter_list;
+   }
+
+ try_default:
+   /* classification failed, try default class */
+   cl = psp_find(TC_H_MAKE(TC_H_MAJ(sch-handle), q-defcls), sch);
+   if (cl == NULL || cl-level  0)
+   return PSP_DIRECT;


I'd prefer if you don't follow the HTB way of using a direct class
for unclassified packets, it makes noticing when classification is
incomplete harder and thats what the default class is for.


+   return cl;
+}
+
+static inline void psp_activate(struct psp_sched_data *q, struct psp_class *cl)
+{
+   cl-activity |= FLAG_ACTIVE;
+   list_add_tail(cl-dlist, q-drop_list);
+}
+
+static inline void psp_deactivate(struct psp_sched_data *q,
+ struct psp_class *cl)
+{
+   cl-activity = MASK_ACTIVE;


MASK_ACTIVE is misleading, its MASK_INACTIVE. I'd suggest
to simply use = ~FLAG_ACTIVE or cl-q.qlen != 0 (which
indicates an active class).


+   list_del_init(cl-dlist);
+}
+
+#define COUNT(x, y) (((x) + ((y) - 1)) / (y))


DIV_ROUND_UP


+static void add_leaf_class(struct psp_sched_data *q, struct psp_class *cl)
+{
+   struct psp_class *p;
+   int mtu = q-mtu + FCS;
+
+   /* chain normal/pacing class list */
+   switch (cl-mode) {
+   case MODE_NORMAL:
+   list_add_tail(cl-plist, q-normal_list);
+   break;
+   case MODE_STATIC:
+   cl-gapsize = (((q-max_rate / 1000) * mtu)
+  / (cl-rate / 1000)) - mtu;
+   cl-gapsize -= (HW_GAP + FCS) * COUNT(q-max_rate, cl-rate);
+   cl-gapsize = max_t(int, cl-gapsize, MIN_GAP);
+   cl-activity |= FLAG_DMARK;
+   list_for_each_entry(p, q-pacing_list, plist) {
+   if (cl-gapsize  p-gapsize)
+   break;
+   }
+   list_add_tail(cl-plist, p-plist);
+   break;
+   }
+}
+
+static int recalc_gapsize(struct sk_buff* skb, struct Qdisc *sch)
+{
+   struct psp_sched_data *q = qdisc_priv(sch);
+   int ret;
+

[PATCH] Add packet filtering based on process's security context.

2007-11-21 Thread Tetsuo Handa

Hello.

This patch comes from a thread at http://lkml.org/lkml/2007/11/16/155 .

I want to use IP/port based access control for incoming connections/datagrams.

This idea was discussed for several times, but there is no approach that 
satisfies
both it can decide based on the recipient's process and
it can sleep so that the LSM hook can query userspace for an access decision.
Thus, I'd like to add one new LSM hook in net/core/datagram.c and
change return type of one existing LSM hook in net/socket.c .

Regards.

-
Subject: Add packet filtering based on process's security context.

This patch allows LSM modules filter incoming connections/datagrams
based on the process's security context who is attempting to pick up.

There are already hooks to filter incoming connections/datagrams
based on the socket's security context, but these hooks are not
applicable when one wants to do TCP Wrapper-like filtering
(e.g. App1 is permitted to accept TCP connections from 192.168.0.0/16).

Precautions: This approach has a side effect which unlikely occurs.

If a socket is shared by multiple processes with differnt policy,
the process who should be able to accept this connection
will not be able to accept this connection
because socket_post_accept() aborts this connection.
But if socket_post_accept() doesn't abort this connection,
the process who must not be able to accept this connection
will repeat accept() forever, which is a worse side effect.

Similarly, if a socket is shared by multiple processes with differnt policy,
the process who should be able to pick up this datagram
will not be able to pick up this datagram
because socket_post_recv_datagram() discards this datagram.
But if socket_post_recv_datagram() doesn't discard this datagram,
the process who must not be able to pick up this datagram
will repeat recvmsg() forever, which is a worse side effect.

So, don't give different permissions between processes who shares one socket.
Otherwise, some connections/datagrams cannot be delivered to intended process.

Signed-off-by: Kentaro Takeda [EMAIL PROTECTED]
Signed-off-by: Tetsuo Handa [EMAIL PROTECTED]

 include/linux/security.h |   34 +-
 net/core/datagram.c  |   26 --
 net/socket.c |7 +--
 security/dummy.c |   13 ++---
 security/security.c  |   10 --
 5 files changed, 76 insertions(+), 14 deletions(-)

--- linux-2.6-mm.orig/include/linux/security.h
+++ linux-2.6-mm/include/linux/security.h
@@ -778,8 +778,12 @@ struct request_sock;
  * @socket_post_accept:
  * This hook allows a security module to copy security
  * information into the newly created socket's inode.
+ * This hook also allows a security module to filter connections
+ * from unwanted peers based on the process accepting this connection.
+ * The connection will be aborted if this hook returns nonzero.
  * @sock contains the listening socket structure.
  * @newsock contains the newly created server socket for connection.
+ * Return 0 if permission is granted.
  * @socket_sendmsg:
  * Check permission before transmitting a message to another socket.
  * @sock contains the socket structure.
@@ -793,6 +797,15 @@ struct request_sock;
  * @size contains the size of message structure.
  * @flags contains the operational flags.
  * Return 0 if permission is granted.  
+ * @socket_post_recv_datagram:
+ * Check permission after receiving a datagram.
+ * This hook allows a security module to filter packets
+ * from unwanted peers based on the process receiving this datagram.
+ * The packet will be discarded if this hook returns nonzero.
+ * @sk contains the socket.
+ * @skb contains the socket buffer (may be NULL).
+ * @flags contains the operational flags.
+ * Return 0 if permission is granted.
  * @socket_getsockname:
  * Check permission before the local address (name) of the socket object
  * @sock is retrieved.
@@ -1389,12 +1402,13 @@ struct security_operations {
   struct sockaddr * address, int addrlen);
int (*socket_listen) (struct socket * sock, int backlog);
int (*socket_accept) (struct socket * sock, struct socket * newsock);
-   void (*socket_post_accept) (struct socket * sock,
-   struct socket * newsock);
+   int (*socket_post_accept) (struct socket *sock, struct socket *newsock);
int (*socket_sendmsg) (struct socket * sock,
   struct msghdr * msg, int size);
int (*socket_recvmsg) (struct socket * sock,
   struct msghdr * msg, int size, int flags);
+   int (*socket_post_recv_datagram) (struct sock *sk, struct sk_buff *skb,
+ unsigned int flags);
int (*socket_getsockname) (struct socket * sock);
int (*socket_getpeername) (struct socket *

[RFC/PATCH] SO_NO_CHECK for IPv6

2007-11-21 Thread Jeff Garzik


SO_NO_CHECK support for IPv6 appeared to be missing. This is presented,
based on a reading of net/ipv4/udp.c.

I wonder if IPv4's CHECKSUM_PARTIAL check from udp_push_pending_frames()
also needs to be copied to IPv6?

Signed-off-by: Jeff Garzik [EMAIL PROTECTED]
---
 net/ipv6/udp.c |   10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ee1cc3f..7927e69 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -538,9 +538,14 @@ static int udp_v6_push_pending_frames(struct sock *sk)
uh-len = htons(up-len);
uh-check = 0;
 
-   if (up-pcflag)
+   if (up-pcflag) /* UDP-Lite  */
csum = udplite_csum_outgoing(sk, skb);
-else
+
+   else if (sk-sk_no_check == UDP_CSUM_NOXMIT) {  /* UDP csum disabled */
+   skb-ip_summed = CHECKSUM_NONE;
+   goto send;
+
+   } else
csum = udp_csum_outgoing(sk, skb);
 
/* add protocol-dependent pseudo-header */
@@ -549,6 +554,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
if (uh-check == 0)
uh-check = CSUM_MANGLED_0;
 
+send:
err = ip6_push_pending_frames(sk);
 out:
up-len = 0;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] amd8111e: don't call napi_enable if configured w/o NAPI

2007-11-21 Thread Jiri Bohac

The amd8111e network driver was broken by 
bea3348eef27e6044b6161fd04c3152215f96411, which makes the driver
call napi_enable() and napi_disable() even if the driver had been 
configured without CONFIG_AMD8111E_NAPI, and thus
netif_napi_add() had not been called on initialization.
This triggers a BUG in napi_enable().

This patch fixes the problem. Please apply.

Signed-off-by: Jiri Bohac [EMAIL PROTECTED]


diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index eebf5bb..e7fdd81 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -1340,7 +1340,9 @@ static int amd8111e_close(struct net_device * dev)
struct amd8111e_priv *lp = netdev_priv(dev);
netif_stop_queue(dev);
 
+#ifdef CONFIG_AMD8111E_NAPI
napi_disable(lp-napi);
+#endif
 
spin_lock_irq(lp-lock);
 
@@ -1372,7 +1374,9 @@ static int amd8111e_open(struct net_device * dev )
 dev-name, dev))
return -EAGAIN;
 
+#ifdef CONFIG_AMD8111E_NAPI
napi_enable(lp-napi);
+#endif
 
spin_lock_irq(lp-lock);
 
@@ -1380,7 +1384,9 @@ static int amd8111e_open(struct net_device * dev )
 
if(amd8111e_restart(dev)){
spin_unlock_irq(lp-lock);
+#ifdef CONFIG_AMD8111E_NAPI
napi_disable(lp-napi);
+#endif
if (dev-irq)
free_irq(dev-irq, dev);
return -ENOMEM;




-- 
Jiri Bohac [EMAIL PROTECTED]
SUSE Labs, SUSE CZ

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: ZD1211RW unaligned accesses...

2007-11-21 Thread Shaddy Baddah


Hi David,

David Miller wrote:

Shaddy I attach a hack patch that you can use which should get
rid of the warnings.


It hasn't seemed to. I patched the source (confirming the patched lines 
are in), compiled, installed and rebooted to effect the changes. My 
zd1211rw modules timestamp indicates that I have an updated module:


$ ls -l /lib/modules/2.6.22/kernel/drivers/net/wireless/zd1211rw/zd1211rw.ko
-rw-r--r-- 1 root root 84536 2007-11-21 23:18 
/lib/modules/2.6.22/kernel/drivers/net/wireless/zd1211rw/zd1211rw.ko


lsmod confirms the module is loaded. After activating the interface 
(without configuring it yet):


$ ifconfig eth2 up

I start getting the messages over and over on the console:

Kernel unaligned access at TPC[100ee624] do_rx+0x394/0x5ec [zd1211rw]
Kernel unaligned access at TPC[100ee62c] do_rx+0x39c/0x5ec [zd1211rw]
Kernel unaligned access at TPC[100ee638] do_rx+0x3a8/0x5ec [zd1211rw]

Sorry that this has not been successful this time, but thanks for your 
help. I will be trying to follow-up on some of the other questions put 
to me.


Regards,
Shaddy
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC/PATCH] SO_NO_CHECK for IPv6

2007-11-21 Thread YOSHIFUJI Hideaki / 吉藤英明

In article [EMAIL PROTECTED] (at Wed, 21 Nov 2007 07:45:32 -0500), Jeff 
Garzik [EMAIL PROTECTED] says:

 
 SO_NO_CHECK support for IPv6 appeared to be missing. This is presented,
 based on a reading of net/ipv4/udp.c.

Disagree. UDP checksum is mandatory in IPv6.

--yoshfuji
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC/PATCH] SO_NO_CHECK for IPv6

2007-11-21 Thread Herbert Xu

On Wed, Nov 21, 2007 at 01:20:51PM +, YOSHIFUJI Hideaki / 吉藤英明 wrote:
 In article [EMAIL PROTECTED] (at Wed, 21 Nov 2007 07:45:32 -0500), Jeff 
 Garzik [EMAIL PROTECTED] says:
 
  
  SO_NO_CHECK support for IPv6 appeared to be missing. This is presented,
  based on a reading of net/ipv4/udp.c.
 
 Disagree. UDP checksum is mandatory in IPv6.

Right, IPv6 doesn't have a header checksum so the UDP checksum
must be there.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] dm9000 - fix spinlock issue, updated

2007-11-21 Thread dmitry pervushin

The patch below fixes the problem with dm9000_timeout function: it calls
dm9000_init under the spin_lock db-lock, which was going to be acquired
again in dm9000_hash_table. From the other hand, dm9000_hash_table has
to be called with db-lock held

Signed-off-by: dmitry pervushin [EMAIL PROTECTED]
Index: linux/drivers/net/dm9000.c
===
--- linux.orig/drivers/net/dm9000.c
+++ linux/drivers/net/dm9000.c
@@ -173,6 +173,7 @@ static void dm9000_phy_write(struct net_
 static u16 read_srom_word(board_info_t *, int);
 static void dm9000_rx(struct net_device *);
 static void dm9000_hash_table(struct net_device *);
+static void dm9000_set_multicast(struct net_device *dev);
 
 //#define DM9000_PROGRAM_EEPROM
 #ifdef DM9000_PROGRAM_EEPROM
@@ -556,7 +557,7 @@ dm9000_probe(struct platform_device *pde
ndev-tx_timeout = dm9000_timeout;
ndev-watchdog_timeo = msecs_to_jiffies(watchdog);
ndev-stop   = dm9000_stop;
-   ndev-set_multicast_list = dm9000_hash_table;
+   ndev-set_multicast_list = dm9000_set_multicast;
 #ifdef CONFIG_NET_POLL_CONTROLLER
ndev-poll_controller= dm9000_poll_controller;
 #endif
@@ -620,6 +621,7 @@ static int
 dm9000_open(struct net_device *dev)
 {
board_info_t *db = (board_info_t *) dev-priv;
+   unsigned long flags;
 
PRINTK2(entering dm9000_open\n);
 
@@ -627,8 +629,10 @@ dm9000_open(struct net_device *dev)
return -EAGAIN;
 
/* Initialize DM9000 board */
+   spin_lock_irqsave(db-lock, flags);
dm9000_reset(db);
dm9000_init_dm9000(dev);
+   spin_unlock_irqrestore(db-lock, flags);
 
/* Init driver variable */
db-dbug_cnt = 0;
@@ -1030,6 +1034,18 @@ cal_CRC(unsigned char *Data, unsigned in
 /*
  *  Set DM9000 multicast address
  */
+
+static void
+dm9000_set_multicast(struct net_device *dev)
+{
+   board_info_t *db = (board_info_t *) dev-priv;
+   unsigned long flags;
+
+   spin_lock_irqsave(db-lock,flags);
+   dm9000_hash_table(dev);
+   spin_unlock_irqrestore(db-lock, flags);
+}
+
 static void
 dm9000_hash_table(struct net_device *dev)
 {
@@ -1038,12 +1054,9 @@ dm9000_hash_table(struct net_device *dev
int mc_cnt = dev-mc_count;
u32 hash_val;
u16 i, oft, hash_table[4];
-   unsigned long flags;
 
PRINTK2(dm9000_hash_table()\n);
 
-   spin_lock_irqsave(db-lock,flags);
-
for (i = 0, oft = 0x10; i  6; i++, oft++)
iow(db, oft, dev-dev_addr[i]);
 
@@ -1065,8 +1078,6 @@ dm9000_hash_table(struct net_device *dev
iow(db, oft++, hash_table[i]  0xff);
iow(db, oft++, (hash_table[i]  8)  0xff);
}
-
-   spin_unlock_irqrestore(db-lock,flags);
 }
 
 
@@ -1155,12 +1166,15 @@ dm9000_drv_resume(struct platform_device
 {
struct net_device *ndev = platform_get_drvdata(dev);
board_info_t *db = (board_info_t *) ndev-priv;
+   unsigned long flags;
 
if (ndev) {
 
if (netif_running(ndev)) {
+   spin_lock_irqsave(db-lock, flags);
dm9000_reset(db);
dm9000_init_dm9000(ndev);
+   spin_unlock_irqrestore(db-lock, flags);
 
netif_device_attach(ndev);
}


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] [LIB]: Introduce struct pcounter

2007-11-21 Thread Herbert Xu

On Wed, Nov 07, 2007 at 04:16:15PM +, Arnaldo Carvalho de Melo wrote:
 This just generalises what was introduced by Eric Dumazet for the struct proto
 inuse field in 286ab3d46058840d68e5d7d52e316c1f7e98c59f:
 
 [NET]: Define infrastructure to keep 'inuse' changes in an efficent 
 SMP/NUMA way.
 
 Please look at the comment in there to see the rationale.
 
 Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]

Both patches applied.  Thanks Arnaldo!

This patch had some trailing white spaces picked up by git which
I've fixed.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] smc911x: Fix unused variable warning.

2007-11-21 Thread Peter Korsgaard

The smc911x_local pointer in smc911x_rcv is only used in the SMC_USE_DMA
case. Move it under the #ifdef so GCC doesn't generate a warning in the
non-DMA case.
---
 drivers/net/smc911x.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index dd18af0..69a78b3 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -428,7 +428,6 @@ static inline void smc911x_drop_pkt(struct net_device *dev)
  */
 static inline void  smc911x_rcv(struct net_device *dev)
 {
-   struct smc911x_local *lp = netdev_priv(dev);
unsigned long ioaddr = dev-base_addr;
unsigned int pkt_len, status;
struct sk_buff *skb;
@@ -473,6 +472,7 @@ static inline void   smc911x_rcv(struct net_device *dev)
skb_put(skb,pkt_len-4);
 #ifdef SMC_USE_DMA
{
+   struct smc911x_local *lp = netdev_priv(dev);
unsigned int fifo;
/* Lower the FIFO threshold if possible */
fifo = SMC_GET_FIFO_INT();
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] smc911x: Fix undefined CONFIG_ symbol warning

2007-11-21 Thread Peter Korsgaard

elif defined(CONFIG_*) should be used instead of elif CONFIG_*
so GCC doesn't give warnings about undefined symbols when the config
option is disabled.
---
 drivers/net/smc911x.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/smc911x.h b/drivers/net/smc911x.h
index 16a0edc..d04e4fa 100644
--- a/drivers/net/smc911x.h
+++ b/drivers/net/smc911x.h
@@ -37,7 +37,7 @@
   #define SMC_USE_16BIT0
   #define SMC_USE_32BIT1
   #define SMC_IRQ_SENSEIRQF_TRIGGER_FALLING
-#elif CONFIG_SH_MAGIC_PANEL_R2
+#elif defined(CONFIG_SH_MAGIC_PANEL_R2)
   #define SMC_USE_SH_DMA   0
   #define SMC_USE_16BIT0
   #define SMC_USE_32BIT1
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] smc911x: Fix undefined CONFIG_ symbol warning

2007-11-21 Thread Peter Korsgaard

elif defined(CONFIG_*) should be used instead of elif CONFIG_*
so GCC doesn't give warnings about undefined symbols when the config
option is disabled.

Signed-off-by: Peter Korsgaard [EMAIL PROTECTED]
---
Sigh, forgot --signoff :/

 drivers/net/smc911x.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/smc911x.h b/drivers/net/smc911x.h
index 16a0edc..d04e4fa 100644
--- a/drivers/net/smc911x.h
+++ b/drivers/net/smc911x.h
@@ -37,7 +37,7 @@
   #define SMC_USE_16BIT0
   #define SMC_USE_32BIT1
   #define SMC_IRQ_SENSEIRQF_TRIGGER_FALLING
-#elif CONFIG_SH_MAGIC_PANEL_R2
+#elif defined(CONFIG_SH_MAGIC_PANEL_R2)
   #define SMC_USE_SH_DMA   0
   #define SMC_USE_16BIT0
   #define SMC_USE_32BIT1
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] smc911x: Fix unused variable warning.

2007-11-21 Thread Peter Korsgaard

The smc911x_local pointer in smc911x_rcv is only used in the SMC_USE_DMA
case. Move it under the #ifdef so GCC doesn't generate a warning in the
non-DMA case.

Signed-off-by: Peter Korsgaard [EMAIL PROTECTED]
---
 drivers/net/smc911x.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index dd18af0..69a78b3 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -428,7 +428,6 @@ static inline void smc911x_drop_pkt(struct net_device *dev)
  */
 static inline void  smc911x_rcv(struct net_device *dev)
 {
-   struct smc911x_local *lp = netdev_priv(dev);
unsigned long ioaddr = dev-base_addr;
unsigned int pkt_len, status;
struct sk_buff *skb;
@@ -473,6 +472,7 @@ static inline void   smc911x_rcv(struct net_device *dev)
skb_put(skb,pkt_len-4);
 #ifdef SMC_USE_DMA
{
+   struct smc911x_local *lp = netdev_priv(dev);
unsigned int fifo;
/* Lower the FIFO threshold if possible */
fifo = SMC_GET_FIFO_INT();
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: HTB/HSFC shaping precision

2007-11-21 Thread jamal

On Wed, 2007-21-11 at 12:31 +0200, Denys Fedoryshchenko wrote:
 On Wed, 21 Nov 2007 10:47:10 +0100, Jarek Poplawski wrote
  
  But, if you have full control on your side, it looks like a kind of
  realtime traffic, and then HFSC should be more appropriate for this
  (but I only 'heard' about this).
 
 One message later, thats what i dreamed about :-)
 Subject: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module 
 On website they have very good explanation... 
 http://www.gridmpi.org/gridtcp.jsp

That looks interesting - without reading the papers a few questions are
developing in my brain cells; for example it looks very similar to what
the chelsio NICs claim to do (which could be a good thing for TCP).
Whenever i see someone implementing something in hardware, i always get
flushes of patents. 

Denys, one of the things i have noticed with iperf is it tries to be
clever and probe the available bandwidth first. So you may not get the
most optimal use of of your bandwidth. Try something like pktgen, its
quiet accurate in its measurements. Just add a tc drop rule on the
receiver to get the accounting.

cheers,
jamal

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module

2007-11-21 Thread jamal

On Wed, 2007-21-11 at 19:18 +0900, Ryousei Takano wrote:
 This patch includes the PSPacer (Precise Software Pacer) qdisc
 module, which achieves precise transmission bandwidth control.
 You can find more information at the project web page
 (http://www.gridmpi.org/gridtcp.jsp).

Good stuff.
I have not read your paper - There are NICs out there (chelsio comes to
mind) which claim to do pacing and have shown impressive numbers with
TCP. Is your approach similar? Are there patents involved by some of
these hardware vendors? (It would not be suprising if they exist).

The advantage with NICs is they have very good control of the timing
(clock granularity being extremely important in cases like this) - what
were your measurements based on i.e what clock source did you use on
Linux?
Also, the idea of using a PAUSE frame to add gaps is interesting, but
you should note that in linux a qdisc may be attached to any network
device and this for example maybe a PPP device etc. What would you use
for gaps in that case? 
I apologize if the answers are in your papers - i just glossed over.

cheers,
jamal 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/8] ibm_newemac: Fix possible lockup on close

2007-11-21 Thread Christoph Hellwig

On Wed, Nov 21, 2007 at 05:06:39PM +1100, Benjamin Herrenschmidt wrote:
 It's a bad idea to call flush_scheduled_work from within a
 netdev-stop because the linkwatch will occasionally take the
 rtnl lock from a workqueue context, and thus that can deadlock.
 
 This reworks things a bit in that area to avoid the problem.

So from the name of the driver you want to keep the previous emac
driver around.  Is there a good reason for that?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/8] ibm_newemac: Fix possible lockup on close

2007-11-21 Thread Josh Boyer

On Wed, 21 Nov 2007 16:41:23 +0100
Christoph Hellwig [EMAIL PROTECTED] wrote:

 On Wed, Nov 21, 2007 at 05:06:39PM +1100, Benjamin Herrenschmidt wrote:
  It's a bad idea to call flush_scheduled_work from within a
  netdev-stop because the linkwatch will occasionally take the
  rtnl lock from a workqueue context, and thus that can deadlock.
  
  This reworks things a bit in that area to avoid the problem.
 
 So from the name of the driver you want to keep the previous emac
 driver around.  Is there a good reason for that?

It's being kept around until arch/ppc dies.  Then things should get
renamed.

josh
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv6 0/3] Interface group patches

2007-11-21 Thread Balazs Scheidler

On Wed, 2007-11-21 at 01:25 +0100, Patrick McHardy wrote:
 David Miller wrote:
  From: Laszlo Attila Toth [EMAIL PROTECTED]
  Date: Tue, 20 Nov 2007 14:52:12 +0100

  Jan Engelhardt írta:
  On Nov 20 2007 14:14, Laszlo Attila Toth wrote:
  This is the 6th version of our interface group patches.

  The interface group value can be used to manage different interfaces
  at the same time such as in netfilter/iptables.
  I take it you could not use...?
iptables -i iif1 -j dosomething
iptables -i iif2 -j dosomething
  This kind of usage requires static interface names. But there are 
  dynamic interfaces such as ppp, where the actual name is not always 
  known or sometimes they exist sometimes not. It is difficult to use 
  iptables this way, and every ifup/ifdown requires change in the iptables 
  ruleset (donwload it, modify and upload to the kernel). It may be too slow.

  This is actually not true these days.

  When network devices are created user events are generated and the
  user can rename the device however they like using a mapping table of
  any kind.

  And at such point the problem you present doesn't actually exist, you
  can know what the device will be named.

  And if rule loading dynamically is slow, we should fix that instead of
  creating infrastructure and interfaces we don't actually need.

 I actually like this feature. Matching on names in iptables
 has always been one of the major bottlenecks, taking
 (according to my last measurement, which is some time ago)
 about 1-2% of the total performance. This is of course in
 large parts because the interface match is present on *every*
 rule, but still some way to logically group interfaces seems
 useful to me, not only for iptables, but also for routing rules,
 traffic classifiers, af_packet sockets etc.

 I'm working on the incremental ruleset changing API BTW :)
 One of the changes will be that interface matching is not
 a default part of every rule, and without wildcards it will
 use the ifindex. But since the cost of this feature seems
 pretty low, I don't see a compelling reason against it.

We are also using interface groups from userspace applications (hence
the netlink notification). 

ppp comes up, an interface is created according to the pppd
configuration, which then assigns the interface to the given group.
another application (a proxy based firewall in our example) listens to
this notification and binds to the new interface as well.

-- 
Bazsi

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module

2007-11-21 Thread Eric Dumazet


Ryousei Takano a écrit :

This patch includes the PSPacer (Precise Software Pacer) qdisc
module, which achieves precise transmission bandwidth control.
You can find more information at the project web page
(http://www.gridmpi.org/gridtcp.jsp).

Signed-off-by: Ryousei Takano [EMAIL PROTECTED]




+static struct sk_buff *alloc_gap_packet(struct Qdisc* sch, int size)
+{
+   struct sk_buff *skb;
+   struct net_device *dev = sch-dev;
+   unsigned char *pkt;
+   int pause_time = 0;
+   int pktsize = size + 2;
+
+   skb = alloc_skb(pktsize, GFP_ATOMIC);
+   if (!skb)
+   return NULL;
+
+   skb_reserve(skb, 2);


minor nit, but skb_reserve is not *needed* here.

skb_reserve() is used to align IP header on a 16 bytes boundary, and
we do it on rx side to speedup IP stack, at the cost of a possibly more
expensive DMA transfert.

Here you dont send an IP packet, do you ?


+
+   pkt = skb-data;
+   memset(pkt, 0xff, pktsize);
+   pkt[0] = 0x01; /* dst address: 01:80:c2:00:00:01 */
+   pkt[1] = 0x80;
+   pkt[2] = 0xc2;
+   pkt[3] = 0x00;
+   pkt[4] = 0x00;
+   pkt[5] = 0x01;
+   memcpy(pkt + 6, dev-dev_addr, ETH_ALEN /* dev-addr_len */);
+
+   pkt[12] = 0x88; /* MAC control:88 08 */
+   pkt[13] = 0x08;
+   pkt[14] = 0;/* MAC control opcode: 00 01 */
+   pkt[15] = 1;
+   pkt[16] = pause_time  8;
+   pkt[17] = pause_time;
+
+   skb_put(skb, size);
+
+   skb-dev = sch-dev;


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 0/2]: TCP MTUprobe fixes

2007-11-21 Thread Ilpo Järvinen

Hi all,  
   
Here are two other things in MTU probe code that caught my
attention while attempting to figure out the sk_send_head
usage there (sent patch to that earlier). The latter here is
not strictly speaking a fix but the original code has striking
complexity to perform a query which can be reduced to a simple
operation, thus I included it here as well. If these seem fine
to you as well, inclusion net-2.6 would be nice. Only compile
tested.
   
-- 
 i.


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 2/2] [TCP] MTUprobe: Cleanup send queue check (no need to loop)

2007-11-21 Thread Ilpo Järvinen

The original code has striking complexity to perform a query
which can be reduced to a very simple compare.

FIN seqno may be included to write_seq but it should not make
any significant difference here compared to skb-len which was
used previously. One won't end up there with SYN still queued.

Use of write_seq check guarantees that there's a valid skb in
send_head so I removed the extra check.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_output.c |7 +--
 1 files changed, 1 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ff22ce8..1822ce6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1315,12 +1315,7 @@ static int tcp_mtu_probe(struct sock *sk)
}
 
/* Have enough data in the send queue to probe? */
-   len = 0;
-   if ((skb = tcp_send_head(sk)) == NULL)
-   return -1;
-   while ((len += skb-len)  size_needed  !tcp_skb_is_last(sk, skb))
-   skb = tcp_write_queue_next(sk, skb);
-   if (len  size_needed)
+   if (tp-write_seq - tp-snd_nxt  size_needed)
return -1;
 
if (tp-snd_wnd  size_needed)
-- 
1.5.0.6

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 1/2] [TCP]: MTUprobe: receiver window data available checks fixed

2007-11-21 Thread Ilpo Järvinen

It seems that the checked range for receiver window check should
begin from the first rather than from the last skb that is going
to be included to the probe. And that can be achieved without
reference to skbs at all, snd_nxt and write_seq provides the
correct seqno already. Plus, it SHOULD account packets that are
necessary to trigger fast retransmit [RFC4821].

Location of snd_wnd  probe_size/size_needed check is bogus
because it will cause the other if() match as well (due to
snd_nxt = snd_una invariant).

Removed dead obvious comment.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
---
 net/ipv4/tcp_output.c |   17 -
 1 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 30d6737..ff22ce8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1289,6 +1289,7 @@ static int tcp_mtu_probe(struct sock *sk)
struct sk_buff *skb, *nskb, *next;
int len;
int probe_size;
+   int size_needed;
unsigned int pif;
int copy;
int mss_now;
@@ -1307,6 +1308,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* Very simple search strategy: just double the MSS. */
mss_now = tcp_current_mss(sk, 0);
probe_size = 2*tp-mss_cache;
+   size_needed = probe_size + (tp-reordering + 1) * mss_now;
if (probe_size  tcp_mtu_to_mss(sk, icsk-icsk_mtup.search_high)) {
/* TODO: set timer for probe_converge_event */
return -1;
@@ -1316,18 +1318,15 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
if ((skb = tcp_send_head(sk)) == NULL)
return -1;
-   while ((len += skb-len)  probe_size  !tcp_skb_is_last(sk, skb))
+   while ((len += skb-len)  size_needed  !tcp_skb_is_last(sk, skb))
skb = tcp_write_queue_next(sk, skb);
-   if (len  probe_size)
+   if (len  size_needed)
return -1;
 
-   /* Receive window check. */
-   if (after(TCP_SKB_CB(skb)-seq + probe_size, tp-snd_una + 
tp-snd_wnd)) {
-   if (tp-snd_wnd  probe_size)
-   return -1;
-   else
-   return 0;
-   }
+   if (tp-snd_wnd  size_needed)
+   return -1;
+   if (after(tp-snd_nxt + size_needed, tp-snd_una + tp-snd_wnd))
+   return 0;
 
/* Do we need to wait to drain cwnd? */
pif = tcp_packets_in_flight(tp);
-- 
1.5.0.6

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH][IRDA] Compilation for CONFIG_INET=n case

2007-11-21 Thread Pavel Emelyanov

Found this occasionally. 

The CONFIG_INET=n is hardly ever set, but if it is the 
irlan_eth_send_gratuitous_arp() compilation should produce a 
warning about unused variable in_dev.

Too pedantic? :)

Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED]

---

diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 7f9c854..c682207 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -296,6 +296,7 @@ void irlan_eth_flow_indication(void *instance, void *sap, 
LOCAL_FLOW flow)
  */
 void irlan_eth_send_gratuitous_arp(struct net_device *dev)
 {
+#ifdef CONFIG_INET
struct in_device *in_dev;
 
/*
@@ -303,7 +304,6 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev)
 * is useful if we have changed access points on the same
 * subnet.
 */
-#ifdef CONFIG_INET
IRDA_DEBUG(4, IrLAN: Sending gratuitous ARP\n);
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 2/2] [TCP] MTUprobe: Cleanup send queue check (no need to loop)

2007-11-21 Thread John Heffner


Ilpo Järvinen wrote:

The original code has striking complexity to perform a query
which can be reduced to a very simple compare.

FIN seqno may be included to write_seq but it should not make
any significant difference here compared to skb-len which was
used previously. One won't end up there with SYN still queued.

Use of write_seq check guarantees that there's a valid skb in
send_head so I removed the extra check.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]


Acked-by: John Heffner [EMAIL PROTECTED]



---
 net/ipv4/tcp_output.c |7 +--
 1 files changed, 1 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ff22ce8..1822ce6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1315,12 +1315,7 @@ static int tcp_mtu_probe(struct sock *sk)
}
 
 	/* Have enough data in the send queue to probe? */

-   len = 0;
-   if ((skb = tcp_send_head(sk)) == NULL)
-   return -1;
-   while ((len += skb-len)  size_needed  !tcp_skb_is_last(sk, skb))
-   skb = tcp_write_queue_next(sk, skb);
-   if (len  size_needed)
+   if (tp-write_seq - tp-snd_nxt  size_needed)
return -1;
 
 	if (tp-snd_wnd  size_needed)


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 1/2] [TCP]: MTUprobe: receiver window data available checks fixed

2007-11-21 Thread John Heffner


Ilpo Järvinen wrote:

It seems that the checked range for receiver window check should
begin from the first rather than from the last skb that is going
to be included to the probe. And that can be achieved without
reference to skbs at all, snd_nxt and write_seq provides the
correct seqno already. Plus, it SHOULD account packets that are
necessary to trigger fast retransmit [RFC4821].

Location of snd_wnd  probe_size/size_needed check is bogus
because it will cause the other if() match as well (due to
snd_nxt = snd_una invariant).

Removed dead obvious comment.

Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]


Acked-by: John Heffner [EMAIL PROTECTED]



---
 net/ipv4/tcp_output.c |   17 -
 1 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 30d6737..ff22ce8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1289,6 +1289,7 @@ static int tcp_mtu_probe(struct sock *sk)
struct sk_buff *skb, *nskb, *next;
int len;
int probe_size;
+   int size_needed;
unsigned int pif;
int copy;
int mss_now;
@@ -1307,6 +1308,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* Very simple search strategy: just double the MSS. */
mss_now = tcp_current_mss(sk, 0);
probe_size = 2*tp-mss_cache;
+   size_needed = probe_size + (tp-reordering + 1) * mss_now;
if (probe_size  tcp_mtu_to_mss(sk, icsk-icsk_mtup.search_high)) {
/* TODO: set timer for probe_converge_event */
return -1;
@@ -1316,18 +1318,15 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
if ((skb = tcp_send_head(sk)) == NULL)
return -1;
-   while ((len += skb-len)  probe_size  !tcp_skb_is_last(sk, skb))
+   while ((len += skb-len)  size_needed  !tcp_skb_is_last(sk, skb))
skb = tcp_write_queue_next(sk, skb);
-   if (len  probe_size)
+   if (len  size_needed)
return -1;
 
-	/* Receive window check. */

-   if (after(TCP_SKB_CB(skb)-seq + probe_size, tp-snd_una + 
tp-snd_wnd)) {
-   if (tp-snd_wnd  probe_size)
-   return -1;
-   else
-   return 0;
-   }
+   if (tp-snd_wnd  size_needed)
+   return -1;
+   if (after(tp-snd_nxt + size_needed, tp-snd_una + tp-snd_wnd))
+   return 0;
 
 	/* Do we need to wait to drain cwnd? */

pif = tcp_packets_in_flight(tp);


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/2][2.6.24] ehea: Reworked rcv queue handling to log only fatal errors

2007-11-21 Thread Thomas Klein

Prevent driver from brawly logging packet checksum errors.

Signed-off-by: Thomas Klein [EMAIL PROTECTED]

---
 drivers/net/ehea/ehea.h  |2 +-
 drivers/net/ehea/ehea_main.c |   11 +--
 drivers/net/ehea/ehea_qmr.h  |4 ++--
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index 5935899..ea67615 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -40,7 +40,7 @@
 #include asm/io.h
 
 #define DRV_NAME   ehea
-#define DRV_VERSIONEHEA_0082
+#define DRV_VERSIONEHEA_0083
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index d2f715d..869e160 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -410,11 +410,6 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, 
int rq,
if (cqe-status  EHEA_CQE_STAT_ERR_CRC)
pr-p_stats.err_frame_crc++;
 
-   if (netif_msg_rx_err(pr-port)) {
-   ehea_error(CQE Error for QP %d, pr-qp-init_attr.qp_nr);
-   ehea_dump(cqe, sizeof(*cqe), CQE);
-   }
-
if (rq == 2) {
*processed_rq2 += 1;
skb = get_skb_by_index(pr-rq2_skba.arr, pr-rq2_skba.len, cqe);
@@ -426,7 +421,11 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, 
int rq,
}
 
if (cqe-status  EHEA_CQE_STAT_FAT_ERR_MASK) {
-   ehea_error(Critical receive error. Resetting port.);
+   if (netif_msg_rx_err(pr-port)) {
+   ehea_error(Critical receive error for QP %d. 
+  Resetting port., pr-qp-init_attr.qp_nr);
+   ehea_dump(cqe, sizeof(*cqe), CQE);
+   }
schedule_work(pr-port-reset_task);
return 1;
}
diff --git a/drivers/net/ehea/ehea_qmr.h b/drivers/net/ehea/ehea_qmr.h
index 562de0e..bc62d38 100644
--- a/drivers/net/ehea/ehea_qmr.h
+++ b/drivers/net/ehea/ehea_qmr.h
@@ -145,8 +145,8 @@ struct ehea_rwqe {
 #define EHEA_CQE_VLAN_TAG_XTRACT   0x0400
 
 #define EHEA_CQE_TYPE_RQ   0x60
-#define EHEA_CQE_STAT_ERR_MASK 0x720F
-#define EHEA_CQE_STAT_FAT_ERR_MASK 0x1F
+#define EHEA_CQE_STAT_ERR_MASK 0x700F
+#define EHEA_CQE_STAT_FAT_ERR_MASK 0xF
 #define EHEA_CQE_STAT_ERR_TCP  0x4000
 #define EHEA_CQE_STAT_ERR_IP   0x2000
 #define EHEA_CQE_STAT_ERR_CRC  0x1000
-- 
1.5.2
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2][2.6.24] ehea: Improve tx packets counting

2007-11-21 Thread Thomas Klein

Using own tx_packets counter instead of firmware counters.

Signed-off-by: Thomas Klein [EMAIL PROTECTED]

---
 drivers/net/ehea/ehea.h  |2 +-
 drivers/net/ehea/ehea_main.c |9 +++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index f78e5bf..5935899 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -40,7 +40,7 @@
 #include asm/io.h
 
 #define DRV_NAME   ehea
-#define DRV_VERSIONEHEA_0080
+#define DRV_VERSIONEHEA_0082
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index f0319f1..d2f715d 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -136,7 +136,7 @@ static struct net_device_stats *ehea_get_stats(struct 
net_device *dev)
struct ehea_port *port = netdev_priv(dev);
struct net_device_stats *stats = port-stats;
struct hcp_ehea_port_cb2 *cb2;
-   u64 hret, rx_packets;
+   u64 hret, rx_packets, tx_packets;
int i;
 
memset(stats, 0, sizeof(*stats));
@@ -162,7 +162,11 @@ static struct net_device_stats *ehea_get_stats(struct 
net_device *dev)
for (i = 0; i  port-num_def_qps; i++)
rx_packets += port-port_res[i].rx_packets;
 
-   stats-tx_packets = cb2-txucp + cb2-txmcp + cb2-txbcp;
+   tx_packets = 0;
+   for (i = 0; i  port-num_def_qps + port-num_add_tx_qps; i++)
+   tx_packets += port-port_res[i].tx_packets;
+
+   stats-tx_packets = tx_packets;
stats-multicast = cb2-rxmcp;
stats-rx_errors = cb2-rxuerr;
stats-rx_bytes = cb2-rxo;
@@ -2000,6 +2004,7 @@ static int ehea_start_xmit(struct sk_buff *skb, struct 
net_device *dev)
}
 
ehea_post_swqe(pr-qp, swqe);
+   pr-tx_packets++;
 
if (unlikely(atomic_read(pr-swqe_avail) = 1)) {
spin_lock_irqsave(pr-netif_queue, flags);
-- 
1.5.2
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH,RFC] ep93xx_eth: conversion to phylib framework

2007-11-21 Thread Andy Fleming



On Nov 16, 2007, at 03:38, Herbert Valerio Riedel wrote:


Currently, the ep93xx_eth driver doesn't care about the PHY state,
but it should, in order to tell the MAC when full duplex operation is
required; failure to do so causes degraded performance on full duplex
links. This patch implements proper PHY handling via the phylib  
framework:


 - clean up ep93xx_mdio_{read,write} to conform to ep93xx manual
 - convert ep93xx_eth driver to phylib framework
 - set full duplex bit in configuration of MAC when FDX link detected
 - convert to use print_mac()


Looks good to me.  My only comment is that we might want to have  
support for checking preamble suppression support in the PHY Lib,  
itself.


Acked-by: Andy Fleming [EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.

2007-11-21 Thread Anil Veerabhadrappa


  The sysfs bits related to the hba should be use one of the scsi sysfs 
  facilities or if they are related to iscsi bits and are generic then 
  through the iscsi hba
  
  bnx2i needs 2 sysfs entries -
  1. QP size info - this is used to size per connection shared data
  structures to issue work requests to chip (login, scsi cmd, tmf, nopin)
  and get completions from the chip (scsi completions, async messages,
  etc'). This is a iSCSI HBA attribute
  2. port mapper - we can be more flexible on classifying this as either
  iSCSI HBA attribute or bnx2i driver global attribute
  Can hooks be added to iSCSI transport class to include these?
  
 
 Which ones were they exactly? I think JamesB wanted only common 
 transport values in the transport class. If it is driver specific then 
 it should go on the host or target or device with the scsi_host_template 
 attrs.
 

It's a chicken  egg issue to put port mapper sysfs entry in scsi host
attributes. Application won't see sysfs unless initiator creates an
iSCSI session and driver can't create an iSCSI session without a tcp
port. I was wondering if there is a better way than using IOCTL in this
situation?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.

2007-11-21 Thread James Smart



Anil Veerabhadrappa wrote:

It's a chicken  egg issue to put port mapper sysfs entry in scsi host
attributes. Application won't see sysfs unless initiator creates an
iSCSI session and driver can't create an iSCSI session without a tcp
port. I was wondering if there is a better way than using IOCTL in this
situation?


Agree, and IMHO, is why the scsi_host should have been bound to the ISID
or something similar (e.g. the initiator port that can have 1 or more
sessions), and the session bound to the scsi_target under the scsi_host.

-- james s
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC/PATCH] SO_NO_CHECK for IPv6

2007-11-21 Thread David Miller

From: Jeff Garzik [EMAIL PROTECTED]
Date: Wed, 21 Nov 2007 07:45:32 -0500

 SO_NO_CHECK support for IPv6 appeared to be missing. This is presented,
 based on a reading of net/ipv4/udp.c.

 I wonder if IPv4's CHECKSUM_PARTIAL check from udp_push_pending_frames()
 also needs to be copied to IPv6?

 Signed-off-by: Jeff Garzik [EMAIL PROTECTED]

IPV6 specifies that, unlike ipv4, this no-checksum behavior
is not allowed.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Routing tables associated with VLANs dissappear when parent ethX down/up

2007-11-21 Thread Ben Greear


For consideration, this patch seems to work for me.  I'm not sure
why we ever listed to these events.  I've only tested on a NIC that
doesn't support hw-accel at the moment..will test with e1000 later.

Thanks,
Ben

--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index c4209c8..acbf0ff 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -615,6 +615,11 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		}
 		break;
 
+#if 0
+		/* Don't propagate management state from base dev to VLANs.  If you do this,
+		 * then if you 'ifconfig eth0 down; ifconfig eth0 up', you also lose all the
+		 * routes for eth0.* VLANs. --Ben
+		 */
 	case NETDEV_DOWN:
 		/* Put all VLANs for this dev in the down state too.  */
 		for (i = 0; i  VLAN_GROUP_ARRAY_LEN; i++) {
@@ -644,6 +649,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			dev_change_flags(vlandev, flgs | IFF_UP);
 		}
 		break;
+#endif
 		
 	case NETDEV_UNREGISTER:
 		/* Delete all VLANs for this dev. */

[PATCH] cassini: NAPI configuration

2007-11-21 Thread Stephen Hemminger

The Cassini driver has NAPI support, but it not possible to configure it.
Compile tested only, no idea if it works (no hardware).
Get rid of warning from lefover variable in now visible code.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

--- a/drivers/net/Kconfig   2007-11-19 18:56:12.0 -0800
+++ b/drivers/net/Kconfig   2007-11-21 11:28:20.0 -0800
@@ -587,6 +587,15 @@ config CASSINI
  Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also
  
http://www.sun.com/products-n-solutions/hardware/docs/pdf/817-4341-10.pdf
 
+config CASSINI_NAPI
+   bool Use Rx Polling (NAPI) (EXPERIMENTAL)
+   depends on CASSINI  EXPERIMENTAL
+   help
+ NAPI is a new driver API designed to reduce CPU and interrupt load
+ when the driver is receiving lots of packets from the card.
+
+ If in doubt, say N.
+
 config SUNVNET
tristate Sun Virtual Network support
depends on SUN_LDOMS
--- a/drivers/net/cassini.c 2007-11-16 16:17:20.0 -0800
+++ b/drivers/net/cassini.c 2007-11-21 11:30:43.0 -0800
@@ -2611,7 +2611,7 @@ static int cas_poll(struct napi_struct *
 {
struct cas *cp = container_of(napi, struct cas, napi);
struct net_device *dev = cp-dev;
-   int i, enable_intr, todo, credits;
+   int i, enable_intr, credits;
u32 status = readl(cp-regs + REG_INTR_STATUS);
unsigned long flags;
 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/8] ibm_newemac: Fix possible lockup on close

2007-11-21 Thread Benjamin Herrenschmidt


On Wed, 2007-11-21 at 16:41 +0100, Christoph Hellwig wrote:
 On Wed, Nov 21, 2007 at 05:06:39PM +1100, Benjamin Herrenschmidt wrote:
  It's a bad idea to call flush_scheduled_work from within a
  netdev-stop because the linkwatch will occasionally take the
  rtnl lock from a workqueue context, and thus that can deadlock.
  
  This reworks things a bit in that area to avoid the problem.
 
 So from the name of the driver you want to keep the previous emac
 driver around.  Is there a good reason for that?

Until arch/ppc is gone... the previous driver works with arch/ppc the
new one with arch/powerpc.

If we kill arch/ppc in .25, then we'll remove the old driver and rename
the new one. If not, that will wait til .26

I'm hard at work porting as much of 4xx over I can to get to the point
where we -can- kill arch/ppc but I'm not done yet.

Cheers,
Ben.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Routing tables associated with VLANs dissappear when parent ethX down/up

2007-11-21 Thread Stephen Hemminger

On Wed, 21 Nov 2007 11:51:43 -0800
Ben Greear [EMAIL PROTECTED] wrote:

 For consideration, this patch seems to work for me.  I'm not sure
 why we ever listed to these events.  I've only tested on a NIC that
 doesn't support hw-accel at the moment..will test with e1000 later.
 
 Thanks,
 Ben
 

But then if you are doing bonding or bridging of vlan's 
and you bring down the root network device, the upper layer is not
notified (for failover).

-- 
Stephen Hemminger [EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Routing tables associated with VLANs dissappear when parent ethX down/up

2007-11-21 Thread Patrick McHardy


Stephen Hemminger wrote:

On Wed, 21 Nov 2007 11:51:43 -0800
Ben Greear [EMAIL PROTECTED] wrote:

  

For consideration, this patch seems to work for me.  I'm not sure
why we ever listed to these events.  I've only tested on a NIC that
doesn't support hw-accel at the moment..will test with e1000 later.

Thanks,
Ben




But then if you are doing bonding or bridging of vlan's 
and you bring down the root network device, the upper layer is not

notified (for failover).
  


operstate should be enough for this I guess. Ben, what does iproute show
for the vlan device when the lower device is down?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Routing tables associated with VLANs dissappear when parent ethX down/up

2007-11-21 Thread Ben Greear


Patrick McHardy wrote:

Stephen Hemminger wrote:

On Wed, 21 Nov 2007 11:51:43 -0800
Ben Greear [EMAIL PROTECTED] wrote:

 

For consideration, this patch seems to work for me.  I'm not sure
why we ever listed to these events.  I've only tested on a NIC that
doesn't support hw-accel at the moment..will test with e1000 later.

Thanks,
Ben




But then if you are doing bonding or bridging of vlan's and you bring 
down the root network device, the upper layer is not

notified (for failover).
  


operstate should be enough for this I guess. Ben, what does iproute show
for the vlan device when the lower device is down?


It looks like it knows, assuming M-DOWN is useful information.
Eth2 is un-plugged, by the way.

[EMAIL PROTECTED] ~]# ifconfig eth2 up
[EMAIL PROTECTED] ~]# ip link show eth2.2
125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP mtu 1500 qdisc 
noqueue
link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff
[EMAIL PROTECTED] ~]# ifconfig eth2 down
[EMAIL PROTECTED] ~]# ip link show eth2.2
125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP,M-DOWN mtu 1500 
qdisc noqueue
link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff
[EMAIL PROTECTED] ~]#

Thanks,
Ben

--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Routing tables associated with VLANs dissappear when parent ethX down/up

2007-11-21 Thread Patrick McHardy


Ben Greear wrote:

Patrick McHardy wrote:

Stephen Hemminger wrote:


But then if you are doing bonding or bridging of vlan's and you 
bring down the root network device, the upper layer is not

notified (for failover).
  


operstate should be enough for this I guess. Ben, what does iproute show
for the vlan device when the lower device is down?


It looks like it knows, assuming M-DOWN is useful information.
Eth2 is un-plugged, by the way.

[EMAIL PROTECTED] ~]# ifconfig eth2 up
[EMAIL PROTECTED] ~]# ip link show eth2.2
125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP mtu 1500 qdisc 
noqueue

link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff
[EMAIL PROTECTED] ~]# ifconfig eth2 down
[EMAIL PROTECTED] ~]# ip link show eth2.2
125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP,M-DOWN mtu 1500 
qdisc noqueue

link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff
[EMAIL PROTECTED] ~]# 


That comes from iproute itself, but the missing LOWER-UP flag
indicates it and that should be enough for bridging and bonding.
I'm unsure about this though since its still a big difference in
userspace visible behaviour, people might just as well manually
configure failover once routing disappears or the device goes down,
or just have routing fall through to different routes. All this
wouldn't work anymore.

Maybe we can make this optional somehow without too much uglyness?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Missing audit information in xfrm_audit_common_policyinfo()?

2007-11-21 Thread Paul Moore

I just noticed that the IPsec auditing code does not appear to audit the 
netmask for the selector source and destination addresses in 
xfrm_audit_common_policyinfo().  Before I threw a patch together I thought I 
would check to see if there was a reason for this that I am missing ...

-- 
paul moore
linux security @ hp
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Routing tables associated with VLANs dissappear when parent ethX down/up

2007-11-21 Thread Ben Greear


Patrick McHardy wrote:

Ben Greear wrote:

Patrick McHardy wrote:

Stephen Hemminger wrote:


But then if you are doing bonding or bridging of vlan's and you 
bring down the root network device, the upper layer is not

notified (for failover).
  


operstate should be enough for this I guess. Ben, what does iproute show
for the vlan device when the lower device is down?


It looks like it knows, assuming M-DOWN is useful information.
Eth2 is un-plugged, by the way.

[EMAIL PROTECTED] ~]# ifconfig eth2 up
[EMAIL PROTECTED] ~]# ip link show eth2.2
125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP mtu 1500 qdisc 
noqueue

link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff
[EMAIL PROTECTED] ~]# ifconfig eth2 down
[EMAIL PROTECTED] ~]# ip link show eth2.2
125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP,M-DOWN mtu 1500 
qdisc noqueue

link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff
[EMAIL PROTECTED] ~]# 


That comes from iproute itself, but the missing LOWER-UP flag
indicates it and that should be enough for bridging and bonding.
I'm unsure about this though since its still a big difference in
userspace visible behaviour, people might just as well manually
configure failover once routing disappears or the device goes down,
or just have routing fall through to different routes. All this
wouldn't work anymore.

Maybe we can make this optional somehow without too much uglyness?


I'm fine with that..we can just add a new vlan-device flag similar to the
reorder-header flag.

With the current code, on 'UP' of the underlying
code, all of the VLANs will also go UP, even if the user had previously
put them DOWN.  That seems like it could be quite dangerous/unexpected
to me..but I guess it's required if we are going to automatically DOWN them...

One other thought:  Maybe we could tell a small lie and say that we have
NO-CARRIER on the VLAN when the underlying device is down OR has no carrier?

That way we keep normal link up/down semantics w/out having to change the
admin state of the VLANs...

Thanks,
Ben

--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Inconsistent lock state and possible irq lock inversion dependency detected in ax25.ko

2007-11-21 Thread Bernard Pidoux


Hi,

I am practicing intensively AX25 packet radio that uses ax25.ko together 
with mkiss, crc16, netrom, and rose modules using two PIII CPU Linux 
machines with 2.6.23.8 kernel.


On the first Linux machine I did not validate kernel hacking and AX25 
applications are running 100% of the time without serious problems.


On the second machine I validated kernel hacking and sooner or later I 
get exactly the same message after a connect timeout expires :


[ INFO: inconsistent lock state ]

The error seems to reside around ax25_disconnect+0x46/0xaf [ax25] that 
is called when an AX25 connect timeout or a connection failure occurs.

Connect timeout is probably activating
ax25_std_heartbeat_expiry+0x19/0xd3 [ax25]

The message is only displayed once on a boot session.

Ralf Baechle explained to me that ax25 code is very buggy and spinlocks 
difficult to trace.
However, as the cause of error is clearlHowever, as the cause of error 
is clearly identified and the y identified and the reported address is 
constant I suspect that an experienced programmer (which I am not) could 
trace the problem.


Moreover, I had the opportunity to catch a different message, that was 
longer than usual, and seem more explicit :


[ INFO: possible irq lock inversion dependency detected ]

Although the symptom is different it is related to the same origin :

fpac/4933 just changed the state of lock:
 (slock-AF_AX25){--..}, at: [d8be3312] ax25_disconnect+0x46/0xaf [ax25]

Whatever the running application is the inconsistent lock state could be 
observed with ax25_call, flexd, fpac ax25 application programms.


Please find attached a few reports captured from dmesg after each event.

Could someone look at the listing and identify the origin of the problem 
,if unique ?


Thanks.

Bernard Pidoux



=
[ INFO: possible irq lock inversion dependency detected ]
2.6.23.1 #1
-
fpac/4933 just changed the state of lock:
 (slock-AF_AX25){--..}, at: [d8be3312] ax25_disconnect+0x46/0xaf [ax25]
but this lock was taken by another, soft-irq-safe lock in the past:
 (ax25_list_lock){-+..}

and interrupts could create inverse lock ordering between them.


other info that might help us debug this:
no locks held by fpac/4933.

the first lock's dependencies:
- (slock-AF_AX25){--..} ops: 410 {
   initial-use  at:
[c012f448] mark_lock+0x5b/0x44b
[c0130358] __lock_acquire+0x4c2/0xc02
[c0130b06] lock_acquire+0x6e/0x87
[c024886b] lock_sock_nested+0x26/0xcc
[c02a37fb] _spin_lock_bh+0x2e/0x39
[c024886b] lock_sock_nested+0x26/0xcc
[c024886b] lock_sock_nested+0x26/0xcc
[c02462e0] sock_fasync+0x61/0x116
[c024727f] sock_close+0x22/0x2f
[c015c78b] __fput+0xbc/0x172
[c015a256] filp_close+0x51/0x58
[c0119daf] put_files_struct+0x5e/0xa6
[c011ae59] do_exit+0x22e/0x6d9
[c0103cc6] sysenter_past_esp+0x8f/0x99
[c012fa5e] trace_hardirqs_on+0x11f/0x148
[c011b36f] sys_exit_group+0x0/0xd
[c0103c96] sysenter_past_esp+0x5f/0x99
[] 0x
   softirq-on-W at:
[c0130a50] __lock_acquire+0xbba/0xc02
[c0130343] __lock_acquire+0x4ad/0xc02
[c011c8e7] local_bh_enable_ip+0xbd/0xc5
[c0130b06] lock_acquire+0x6e/0x87
[d8be3312] ax25_disconnect+0x46/0xaf [ax25]
[c02a37c2] _spin_lock+0x29/0x34
[d8be3312] ax25_disconnect+0x46/0xaf [ax25]
[d8be3312] ax25_disconnect+0x46/0xaf [ax25]
[d8be50c0] ax25_release+0x9d/0x182 [ax25]
[c0246e79] sock_release+0x14/0x56
[c0247287] sock_close+0x2a/0x2f
[c015c78b] __fput+0xbc/0x172
[c015a256] filp_close+0x51/0x58
[c015b284] sys_close+0x66/0x9d
[c0103c96] sysenter_past_esp+0x5f/0x99
[] 0x
   hardirq-on-W at:
[c012f448] mark_lock+0x5b/0x44b
[c013031e] __lock_acquire+0x488/0xc02
[c0130b06] lock_acquire+0x6e/0x87
[c024886b] lock_sock_nested+0x26/0xcc
[c02a37fb] _spin_lock_bh+0x2e/0x39
[c024886b] lock_sock_nested+0x26/0xcc
[c024886b] lock_sock_nested+0x26/0xcc
[c02462e0] sock_fasync+0x61/0x116
[c024727f]

Re: BUG: skge ethernet breakage (PCI: Unable to reserve mem region)

2007-11-21 Thread Stephen Hemminger

On Wed, 19 Sep 2007 22:57:49 +0200
Jan Gukelberger [EMAIL PROTECTED] wrote:

 Hi,
 
 seems as if there are currently no more ideas?
 
 So shall I perhaps open a bug in Kernel Bugzilla?
 
 Thanks,
 Jan
 
 On Tue, 2007-09-11 at 15:39 +0200, Jan Gukelberger wrote:
  On Tue, 2007-09-11 at 14:37 +0200, Stephen Hemminger wrote:
   On Tue, 11 Sep 2007 12:58:24 +0200
   Jan Gukelberger [EMAIL PROTECTED] wrote:
   
On Tue, 2007-09-11 at 10:21 +0200, Stephen Hemminger wrote:
 On Fri, 07 Sep 2007 18:42:35 +0200
 Jan Gukelberger [EMAIL PROTECTED] wrote:
[...]
  The key problem seem to be the following lines in dmesg:
  
  ACPI: PCI Interrupt :04:04.0[A] - GSI 19 (level, low) - IRQ 19
  PCI: Unable to reserve mem region #1:[EMAIL PROTECTED] for device 
  :04:04.0
  skge :04:04.0: cannot obtain PCI resources
  ACPI: PCI interrupt for device :04:04.0 disabled
  skge: probe of :04:04.0 failed with error -16
  
  
 
 There is some kind of device conflict, please provide lspci -vvvxx 
 output.

I'm attaching the output of 'lspci -vvvxx' on the working 2.6.20 kernel
as well as the output of 'lspci -vvxxx' on 2.6.23-rc5 which I recorded
earlier.
I you specifically need 'lspci -vvvxx' on 2.6.23-rc5 please drop me a
note and I'll reboot quickly.

Thanks,
Jan
   
   All looks in order, on the PCI tables. There is a firewire control just
   above the skge device, perhaps you enabled one of the firewire stacks
   in the configuration? 
  
  I did a quick diff of the respective kernel .config's (this is the
  configuration you mean, right?) and haven't found any notable
  differences in the firewire options.
  
Perhaps the console (dmesg) output will show some clue.
  
  I'm attaching a diff between dmesg of a working and a non-working boot.
  You can find the full dmesg records in my first mail and in the Debian
  BTS respectively.
  
  The only thing I can see there is the old  kernel having some problems
  with the SATA controller - even though I did never notice any unusual
  behaviour apart from these messages:
  
  PCI: Device :02:00.0 not available because of resource collisions
  ahci: probe of :02:00.0 failed with error -22
  JMB363: IDE controller at PCI slot :02:00.0
  PCI: Device :02:00.0 not available because of resource collisions
  ACPI: PCI Interrupt :02:00.0[A] - GSI 16 (level, low) - IRQ 16
  JMB363: BIOS configuration fixed.
  
  
  Don't know whether this could be related?
  
  Thanks,
  Jan
 

The problem is in the tables (ACPI) from the BIOS. So ACPI
driver (and/or BIOS) have to work out the resource assignments,
the driver really has nothing to do with it.

-- 
Stephen Hemminger [EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Routing tables associated with VLANs dissappear when parent ethX down/up

2007-11-21 Thread Patrick McHardy


Ben Greear wrote:

Patrick McHardy wrote:


That comes from iproute itself, but the missing LOWER-UP flag
indicates it and that should be enough for bridging and bonding.
I'm unsure about this though since its still a big difference in
userspace visible behaviour, people might just as well manually
configure failover once routing disappears or the device goes down,
or just have routing fall through to different routes. All this
wouldn't work anymore.

Maybe we can make this optional somehow without too much uglyness?


I'm fine with that..we can just add a new vlan-device flag similar to the
reorder-header flag.


An alternative to this would be something like Julian Anastasov static 
routes
patch. Not sure if it has ever been considered for merging, but its a 
cleaner

way than doing per-device hacks.

http://www.ssi.bg/~ja/



With the current code, on 'UP' of the underlying
code, all of the VLANs will also go UP, even if the user had previously
put them DOWN.  That seems like it could be quite dangerous/unexpected
to me..but I guess it's required if we are going to automatically DOWN 
them...


Yeah, I too never liked this behaviour.



One other thought:  Maybe we could tell a small lie and say that we have
NO-CARRIER on the VLAN when the underlying device is down OR has no 
carrier?


That way we keep normal link up/down semantics w/out having to change the
admin state of the VLANs...


Thats pretty much what the operstate is doing, it should go to
IF_OPER_LOWERLAYERDOWN when the lower device is down. But as I
said above, people could actually rely on routes disappearing.


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] sky2: disable rx checksum on Yukon XL

2007-11-21 Thread Stephen Hemminger

The Marvell Yukon XL chipset appears to have a hardware glitch
where it will repeat the checksum of the last packet. Of course, this is
timing sensitive and only happens sometimes...

More info: http://bugzilla.kernel.org/show_bug.cgi?id=9381

As a workaround just disable hardware checksumming by default on
this chip version. The earlier workaround for PCIX, dual port
was also on Yukon XL so don't need to disable checksumming there.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

--- a/drivers/net/sky2.c2007-11-21 13:53:33.0 -0800
+++ b/drivers/net/sky2.c2007-11-21 13:59:09.0 -0800
@@ -1320,15 +1320,11 @@ static int sky2_up(struct net_device *de
 */
if (otherdev  netif_running(otherdev) 
(cap = pci_find_capability(hw-pdev, PCI_CAP_ID_PCIX))) {
-   struct sky2_port *osky2 = netdev_priv(otherdev);
u16 cmd;
 
pci_read_config_word(hw-pdev, cap + PCI_X_CMD, cmd);
cmd = ~PCI_X_CMD_MAX_SPLIT;
pci_write_config_word(hw-pdev, cap + PCI_X_CMD, cmd);
-
-   sky2-rx_csum = 0;
-   osky2-rx_csum = 0;
}
 
if (netif_msg_ifup(sky2))
@@ -4013,7 +4009,7 @@ static __devinit struct net_device *sky2
sky2-duplex = -1;
sky2-speed = -1;
sky2-advertising = sky2_supported_modes(hw);
-   sky2-rx_csum = 1;
+   sky2-rx_csum = (hw-chip_id != CHIP_ID_YUKON_XL);
sky2-wol = wol;
 
spin_lock_init(sky2-phy_lock);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 3/4] tlan list is subscribers-only

2007-11-21 Thread akpm

From: Gabriel C [EMAIL PROTECTED]

Your mail to 'Tlan-devel' with the subject

drivers/net/tlan question

Is being held until the list moderator can review it for approval.

The reason it is being held:

Post by non-member to a members-only list

Signed-off-by: Gabriel Craciunescu [EMAIL PROTECTED]

Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 MAINTAINERS |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN MAINTAINERS~tlan-list-is-subscribers-only MAINTAINERS
--- a/MAINTAINERS~tlan-list-is-subscribers-only
+++ a/MAINTAINERS
@@ -3733,7 +3733,7 @@ S:Maintained
 TLAN NETWORK DRIVER
 P: Samuel Chessman
 M: [EMAIL PROTECTED]
-L: [EMAIL PROTECTED]
+L: [EMAIL PROTECTED] (subscribers-only)
 W: http://sourceforge.net/projects/tlan/
 S: Maintained
 
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 4/4] Net: sunrpc, remove SPIN_LOCK_UNLOCKED

2007-11-21 Thread akpm

From: Jiri Slaby [EMAIL PROTECTED]

sunrpc, remove SPIN_LOCK_UNLOCKED

SPIN_LOCK_UNLOCKED is deprecated, use DEFINE_SPINLOCK instead

Signed-off-by: Jiri Slaby [EMAIL PROTECTED]
Cc: David S. Miller [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 net/sunrpc/xprt.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN net/sunrpc/xprt.c~net-sunrpc-remove-spin_lock_unlocked 
net/sunrpc/xprt.c
--- a/net/sunrpc/xprt.c~net-sunrpc-remove-spin_lock_unlocked
+++ a/net/sunrpc/xprt.c
@@ -62,7 +62,7 @@ static inline voiddo_xprt_reserve(struc
 static voidxprt_connect_status(struct rpc_task *task);
 static int  __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
 
-static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
 
 /*
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 1/4] pfkey: sending an SADB_GET responds with an SADB_GET

2007-11-21 Thread akpm

From: Charles Hardin [EMAIL PROTECTED]

Kernel needs to respond to an SADB_GET with the same message type to
conform to the RFC 2367 Section 3.1.5

Cc: David S. Miller [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 net/key/af_key.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN net/key/af_key.c~pfkey-sending-an-sadb_get-responds-with-an-sadb_get 
net/key/af_key.c
--- a/net/key/af_key.c~pfkey-sending-an-sadb_get-responds-with-an-sadb_get
+++ a/net/key/af_key.c
@@ -1552,7 +1552,7 @@ static int pfkey_get(struct sock *sk, st
 
out_hdr = (struct sadb_msg *) out_skb-data;
out_hdr-sadb_msg_version = hdr-sadb_msg_version;
-   out_hdr-sadb_msg_type = SADB_DUMP;
+   out_hdr-sadb_msg_type = SADB_GET;
out_hdr-sadb_msg_satype = pfkey_proto2satype(proto);
out_hdr-sadb_msg_errno = 0;
out_hdr-sadb_msg_reserved = 0;
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 2/4] make sunrpc/xprtsock.c:xs_setup_{udp,tcp}() static

2007-11-21 Thread akpm

From: Adrian Bunk [EMAIL PROTECTED]

xs_setup_{udp,tcp}() can now become static.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 include/linux/sunrpc/xprtsock.h |6 --
 net/sunrpc/xprtsock.c   |4 ++--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff -puN 
include/linux/sunrpc/xprtsock.h~make-sunrpc-xprtsockcxs_setup_udptcp-static 
include/linux/sunrpc/xprtsock.h
--- 
a/include/linux/sunrpc/xprtsock.h~make-sunrpc-xprtsockcxs_setup_udptcp-static
+++ a/include/linux/sunrpc/xprtsock.h
@@ -9,12 +9,6 @@
 
 #ifdef __KERNEL__
 
-/*
- * Socket transport setup operations
- */
-struct rpc_xprt *xs_setup_udp(struct xprt_create *args);
-struct rpc_xprt *xs_setup_tcp(struct xprt_create *args);
-
 intinit_socket_xprt(void);
 void   cleanup_socket_xprt(void);
 
diff -puN net/sunrpc/xprtsock.c~make-sunrpc-xprtsockcxs_setup_udptcp-static 
net/sunrpc/xprtsock.c
--- a/net/sunrpc/xprtsock.c~make-sunrpc-xprtsockcxs_setup_udptcp-static
+++ a/net/sunrpc/xprtsock.c
@@ -1828,7 +1828,7 @@ static struct rpc_xprt *xs_setup_xprt(st
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
+static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 {
struct sockaddr *addr = args-dstaddr;
struct rpc_xprt *xprt;
@@ -1894,7 +1894,7 @@ struct rpc_xprt *xs_setup_udp(struct xpr
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
+static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 {
struct sockaddr *addr = args-dstaddr;
struct rpc_xprt *xprt;
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 5/8] pcmcia net: use roundup_pow_of_two() macro instead of grotesque loop

2007-11-21 Thread akpm

From: Robert P. J. Day [EMAIL PROTECTED]

Signed-off-by: Robert P. J. Day [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Cc: Dominik Brodowski [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/pcmcia/pcnet_cs.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff -puN 
drivers/net/pcmcia/pcnet_cs.c~pcmcia-net-use-roundup_pow_of_two-macro-instead-of-grotesque-loop
 drivers/net/pcmcia/pcnet_cs.c
--- 
a/drivers/net/pcmcia/pcnet_cs.c~pcmcia-net-use-roundup_pow_of_two-macro-instead-of-grotesque-loop
+++ a/drivers/net/pcmcia/pcnet_cs.c
@@ -38,6 +38,7 @@
 #include linux/delay.h
 #include linux/ethtool.h
 #include linux/netdevice.h
+#include linux/log2.h
 #include ../8390.h
 
 #include pcmcia/cs_types.h
@@ -1484,8 +1485,7 @@ static int setup_shmem_window(struct pcm
window_size = 32 * 1024;
 
 /* Make sure it's a power of two.  */
-while ((window_size  (window_size - 1)) != 0)
-   window_size += window_size  ~(window_size - 1);
+window_size = roundup_pow_of_two(window_size);
 
 /* Allocate a memory window */
 req.Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE;
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 8/8] forcedeth boot delay fix

2007-11-21 Thread akpm

From: Ayaz Abdulla [EMAIL PROTECTED]

Fix a long boot delay in the forcedeth driver.  During initialization, the
timeout for the handshake between mgmt unit and driver can be very long. 
The patch reduces the timeout by eliminating a extra loop around the
timeout logic.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=9308

Signed-off-by: Ayaz Abdulla [EMAIL PROTECTED]
Cc: Alex Howells [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/forcedeth.c |   22 +-
 1 file changed, 9 insertions(+), 13 deletions(-)

diff -puN drivers/net/forcedeth.c~forcedeth-boot-delay-fix 
drivers/net/forcedeth.c
--- a/drivers/net/forcedeth.c~forcedeth-boot-delay-fix
+++ a/drivers/net/forcedeth.c
@@ -5294,19 +5294,15 @@ static int __devinit nv_probe(struct pci
if (readl(base + NvRegTransmitterControl)  
NVREG_XMITCTL_SYNC_PHY_INIT) {
np-mac_in_use = readl(base + NvRegTransmitterControl) 
 NVREG_XMITCTL_MGMT_ST;
dprintk(KERN_INFO %s: mgmt unit is running. mac in use 
%x.\n, pci_name(pci_dev), np-mac_in_use);
-   for (i = 0; i  5000; i++) {
-   msleep(1);
-   if (nv_mgmt_acquire_sema(dev)) {
-   /* management unit setup the phy 
already? */
-   if ((readl(base + 
NvRegTransmitterControl)  NVREG_XMITCTL_SYNC_MASK) ==
-   NVREG_XMITCTL_SYNC_PHY_INIT) {
-   /* phy is inited by mgmt unit */
-   phyinitialized = 1;
-   dprintk(KERN_INFO %s: Phy 
already initialized by mgmt unit.\n, pci_name(pci_dev));
-   } else {
-   /* we need to init the phy */
-   }
-   break;
+   if (nv_mgmt_acquire_sema(dev)) {
+   /* management unit setup the phy already? */
+   if ((readl(base + NvRegTransmitterControl)  
NVREG_XMITCTL_SYNC_MASK) ==
+   NVREG_XMITCTL_SYNC_PHY_INIT) {
+   /* phy is inited by mgmt unit */
+   phyinitialized = 1;
+   dprintk(KERN_INFO %s: Phy already 
initialized by mgmt unit.\n, pci_name(pci_dev));
+   } else {
+   /* we need to init the phy */
}
}
}
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 3/8] ucc_geth-fix-build-break-introduced-by-commit-09f75cd7bf13720738e6a196cc0107ce9a5bd5a0-checkpatch-fixes

2007-11-21 Thread akpm

From: Andrew Morton [EMAIL PROTECTED]

Cc: David S. Miller [EMAIL PROTECTED]
Cc: Emil Medve [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Cc: Kumar Gala [EMAIL PROTECTED]
Cc: Li Yang [EMAIL PROTECTED]
Cc: Paul Mackerras [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/ucc_geth.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN 
drivers/net/ucc_geth.c~ucc_geth-fix-build-break-introduced-by-commit-09f75cd7bf13720738e6a196cc0107ce9a5bd5a0-checkpatch-fixes
 drivers/net/ucc_geth.c
--- 
a/drivers/net/ucc_geth.c~ucc_geth-fix-build-break-introduced-by-commit-09f75cd7bf13720738e6a196cc0107ce9a5bd5a0-checkpatch-fixes
+++ a/drivers/net/ucc_geth.c
@@ -3443,7 +3443,7 @@ static int ucc_geth_rx(struct ucc_geth_p
u16 length, howmany = 0;
u32 bd_status;
u8 *bdBuffer;
-   struct net_device * dev;
+   struct net_device *dev;
 
ugeth_vdbg(%s: IN, __FUNCTION__);
 
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 4/8] drivers/net/chelsio/: #if 0 unused functions

2007-11-21 Thread akpm

From: Adrian Bunk [EMAIL PROTECTED]

This patch #if 0's the following unused functions:
- espi.c:t1_espi_set_misc_ctrl()
- sge.c:t1_sched_set_max_avail_bytes()
- sge.c:t1_sched_set_drain_bits_per_us()

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/chelsio/espi.c |2 ++
 drivers/net/chelsio/espi.h |1 -
 drivers/net/chelsio/sge.c  |4 
 drivers/net/chelsio/sge.h  |2 --
 4 files changed, 6 insertions(+), 3 deletions(-)

diff -puN drivers/net/chelsio/espi.c~drivers-net-chelsio-if-0-unused-functions 
drivers/net/chelsio/espi.c
--- a/drivers/net/chelsio/espi.c~drivers-net-chelsio-if-0-unused-functions
+++ a/drivers/net/chelsio/espi.c
@@ -297,6 +297,7 @@ struct peespi *t1_espi_create(adapter_t 
return espi;
 }
 
+#if 0
 void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val)
 {
struct peespi *espi = adapter-espi;
@@ -309,6 +310,7 @@ void t1_espi_set_misc_ctrl(adapter_t *ad
writel(espi-misc_ctrl, adapter-regs + A_ESPI_MISC_CONTROL);
spin_unlock(espi-lock);
 }
+#endif  /*  0  */
 
 u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait)
 {
diff -puN drivers/net/chelsio/espi.h~drivers-net-chelsio-if-0-unused-functions 
drivers/net/chelsio/espi.h
--- a/drivers/net/chelsio/espi.h~drivers-net-chelsio-if-0-unused-functions
+++ a/drivers/net/chelsio/espi.h
@@ -62,7 +62,6 @@ void t1_espi_intr_disable(struct peespi 
 int t1_espi_intr_handler(struct peespi *);
 const struct espi_intr_counts *t1_espi_get_intr_counts(struct peespi *espi);
 
-void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val);
 u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait);
 int t1_espi_get_mon_t204(adapter_t *, u32 *, u8);
 
diff -puN drivers/net/chelsio/sge.c~drivers-net-chelsio-if-0-unused-functions 
drivers/net/chelsio/sge.c
--- a/drivers/net/chelsio/sge.c~drivers-net-chelsio-if-0-unused-functions
+++ a/drivers/net/chelsio/sge.c
@@ -330,6 +330,8 @@ unsigned int t1_sched_update_parms(struc
return max_avail_segs * (p-mtu - 40);
 }
 
+#if 0
+
 /*
  * t1_sched_max_avail_bytes() tells the scheduler the maximum amount of
  * data that can be pushed per port.
@@ -357,6 +359,8 @@ void t1_sched_set_drain_bits_per_us(stru
t1_sched_update_parms(sge, port, 0, 0);
 }
 
+#endif  /*  0  */
+
 
 /*
  * get_clock() implements a ns clock (see ktime_get)
diff -puN drivers/net/chelsio/sge.h~drivers-net-chelsio-if-0-unused-functions 
drivers/net/chelsio/sge.h
--- a/drivers/net/chelsio/sge.h~drivers-net-chelsio-if-0-unused-functions
+++ a/drivers/net/chelsio/sge.h
@@ -89,8 +89,6 @@ void t1_sge_intr_disable(struct sge *);
 void t1_sge_intr_clear(struct sge *);
 const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge);
 void t1_sge_get_port_stats(const struct sge *sge, int port, struct 
sge_port_stats *);
-void t1_sched_set_max_avail_bytes(struct sge *, unsigned int);
-void t1_sched_set_drain_bits_per_us(struct sge *, unsigned int, unsigned int);
 unsigned int t1_sched_update_parms(struct sge *, unsigned int, unsigned int,
   unsigned int);
 
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)

2007-11-21 Thread akpm

From: Michael Pyne [EMAIL PROTECTED]

Partially revert a change to mac address detection introduced to the forcedeth
driver.  The change was intended to correct mac address detection for newer
nVidia chipsets where the mac address was stored in reverse order.  One of
those chipsets appears to still have the mac address in reverse order (or at
least, it does on my system).

The change that broke mac address detection for my card was commit
ef756b3e56c68a4d76d9d7b9a73fa8f4f739180f forcedeth: mac address correct

My network card is an nVidia built-in Ethernet card, output from lspci as
follows (with text and numeric ids):
$ lspci | grep Ethernet
00:07.0 Bridge: nVidia Corporation MCP61 Ethernet (rev a2)
$ lspci -n | grep 07.0
00:07.0 0680: 10de:03ef (rev a2)

The vendor id is, of course, nVidia.  The device id corresponds to the
NVIDIA_NVENET_19 entry.

The included patch fixes the MAC address detection on my system.
Interestingly, the MAC address appears to be in the range reserved for my
motherboard manufacturer (Gigabyte) and not nVidia.

Signed-off-by: Michael J. Pyne [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Cc: Ayaz Abdulla [EMAIL PROTECTED]
Cc: [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/forcedeth.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN 
drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network-card-regression-in-2623
 drivers/net/forcedeth.c
--- 
a/drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network-card-regression-in-2623
+++ a/drivers/net/forcedeth.c
@@ -,7 +,7 @@ static struct pci_device_id pci_tbl[] = 
},
{   /* MCP61 Ethernet Controller */
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_NVENET_19),
-   .driver_data = 
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR,
+   .driver_data = 
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT,
},
{   /* MCP65 Ethernet Controller */
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_NVENET_20),
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 7/8] Net: ibm_newemac, remove SPIN_LOCK_UNLOCKED

2007-11-21 Thread akpm

From: Jiri Slaby [EMAIL PROTECTED]

SPIN_LOCK_UNLOCKED is deprecated, use DEFINE_SPINLOCK instead

Signed-off-by: Jiri Slaby [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/ibm_newemac/debug.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN 
drivers/net/ibm_newemac/debug.c~net-ibm_newemac-remove-spin_lock_unlocked 
drivers/net/ibm_newemac/debug.c
--- a/drivers/net/ibm_newemac/debug.c~net-ibm_newemac-remove-spin_lock_unlocked
+++ a/drivers/net/ibm_newemac/debug.c
@@ -21,7 +21,7 @@
 
 #include core.h
 
-static spinlock_t emac_dbg_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(emac_dbg_lock);
 
 static void emac_desc_dump(struct emac_instance *p)
 {
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 6/8] forcedeth: new mcp79 device ids

2007-11-21 Thread akpm

From: Ayaz Abdulla [EMAIL PROTECTED]

Add new device ids and features for mcp79 devices into the forcedeth driver.

Signed-off-by: Ayaz Abdulla [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Cc: Manfred Spraul [EMAIL PROTECTED]
Cc: [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/forcedeth.c |   16 
 include/linux/pci_ids.h |4 
 2 files changed, 20 insertions(+)

diff -puN drivers/net/forcedeth.c~forcedeth-new-mcp79-device-ids 
drivers/net/forcedeth.c
--- a/drivers/net/forcedeth.c~forcedeth-new-mcp79-device-ids
+++ a/drivers/net/forcedeth.c
@@ -5621,6 +5621,22 @@ static struct pci_device_id pci_tbl[] = 
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_NVENET_35),
.driver_data = 
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT,
},
+   {   /* MCP79 Ethernet Controller */
+   PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_NVENET_36),
+   .driver_data = 
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT,
+   },
+   {   /* MCP79 Ethernet Controller */
+   PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_NVENET_37),
+   .driver_data = 
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT,
+   },
+   {   /* MCP79 Ethernet Controller */
+   PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_NVENET_38),
+   .driver_data = 
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT,
+   },
+   {   /* MCP79 Ethernet Controller */
+   PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 
PCI_DEVICE_ID_NVIDIA_NVENET_39),
+   .driver_data = 
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT,
+   },
{0,},
 };
 
diff -puN include/linux/pci_ids.h~forcedeth-new-mcp79-device-ids 
include/linux/pci_ids.h
--- a/include/linux/pci_ids.h~forcedeth-new-mcp79-device-ids
+++ a/include/linux/pci_ids.h
@@ -1237,6 +1237,10 @@
 #define PCI_DEVICE_ID_NVIDIA_NVENET_33  0x0761
 #define PCI_DEVICE_ID_NVIDIA_NVENET_34  0x0762
 #define PCI_DEVICE_ID_NVIDIA_NVENET_35  0x0763
+#define PCI_DEVICE_ID_NVIDIA_NVENET_36  0x0AB0
+#define PCI_DEVICE_ID_NVIDIA_NVENET_37  0x0AB1
+#define PCI_DEVICE_ID_NVIDIA_NVENET_38  0x0AB2
+#define PCI_DEVICE_ID_NVIDIA_NVENET_39  0x0AB3
 
 #define PCI_VENDOR_ID_IMS  0x10e0
 #define PCI_DEVICE_ID_IMS_TT1280x9128
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 2/5] bluetooth: uninlining

2007-11-21 Thread akpm

From: Andrew Morton [EMAIL PROTECTED]

Remove all those inlines which were either a) unneeded or b) increased code
size.

  textdata bss dec hex filename
before:   6997  74   870791ba7 net/bluetooth/hidp/core.o
after:6492  74   8657419ae net/bluetooth/hidp/core.o

Cc: Marcel Holtmann [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 net/bluetooth/hidp/core.c |   30 +++---
 1 file changed, 19 insertions(+), 11 deletions(-)

diff -puN net/bluetooth/hidp/core.c~bluetooth-uninlining 
net/bluetooth/hidp/core.c
--- a/net/bluetooth/hidp/core.c~bluetooth-uninlining
+++ a/net/bluetooth/hidp/core.c
@@ -135,8 +135,8 @@ static void __hidp_copy_session(struct h
}
 }
 
-static inline int hidp_queue_event(struct hidp_session *session, struct 
input_dev *dev,
-   unsigned int type, unsigned int code, 
int value)
+static int hidp_queue_event(struct hidp_session *session, struct input_dev 
*dev,
+   unsigned int type, unsigned int code, int value)
 {
unsigned char newleds;
struct sk_buff *skb;
@@ -243,7 +243,8 @@ static void hidp_input_report(struct hid
input_sync(dev);
 }
 
-static inline int hidp_queue_report(struct hidp_session *session, unsigned 
char *data, int size)
+static int hidp_queue_report(struct hidp_session *session,
+   unsigned char *data, int size)
 {
struct sk_buff *skb;
 
@@ -287,7 +288,7 @@ static void hidp_idle_timeout(unsigned l
hidp_schedule(session);
 }
 
-static inline void hidp_set_timer(struct hidp_session *session)
+static void hidp_set_timer(struct hidp_session *session)
 {
if (session-idle_to  0)
mod_timer(session-timer, jiffies + HZ * session-idle_to);
@@ -332,7 +333,8 @@ static inline int hidp_send_ctrl_message
return err;
 }
 
-static inline void hidp_process_handshake(struct hidp_session *session, 
unsigned char param)
+static void hidp_process_handshake(struct hidp_session *session,
+   unsigned char param)
 {
BT_DBG(session %p param 0x%02x, session, param);
 
@@ -365,7 +367,8 @@ static inline void hidp_process_handshak
}
 }
 
-static inline void hidp_process_hid_control(struct hidp_session *session, 
unsigned char param)
+static void hidp_process_hid_control(struct hidp_session *session,
+   unsigned char param)
 {
BT_DBG(session %p param 0x%02x, session, param);
 
@@ -379,7 +382,8 @@ static inline void hidp_process_hid_cont
}
 }
 
-static inline void hidp_process_data(struct hidp_session *session, struct 
sk_buff *skb, unsigned char param)
+static void hidp_process_data(struct hidp_session *session, struct sk_buff 
*skb,
+   unsigned char param)
 {
BT_DBG(session %p skb %p len %d param 0x%02x, session, skb, skb-len, 
param);
 
@@ -406,7 +410,8 @@ static inline void hidp_process_data(str
}
 }
 
-static inline void hidp_recv_ctrl_frame(struct hidp_session *session, struct 
sk_buff *skb)
+static void hidp_recv_ctrl_frame(struct hidp_session *session,
+   struct sk_buff *skb)
 {
unsigned char hdr, type, param;
 
@@ -440,7 +445,8 @@ static inline void hidp_recv_ctrl_frame(
kfree_skb(skb);
 }
 
-static inline void hidp_recv_intr_frame(struct hidp_session *session, struct 
sk_buff *skb)
+static void hidp_recv_intr_frame(struct hidp_session *session,
+   struct sk_buff *skb)
 {
unsigned char hdr;
 
@@ -608,7 +614,8 @@ static struct device *hidp_get_device(st
return conn ? conn-dev : NULL;
 }
 
-static inline int hidp_setup_input(struct hidp_session *session, struct 
hidp_connadd_req *req)
+static int hidp_setup_input(struct hidp_session *session,
+   struct hidp_connadd_req *req)
 {
struct input_dev *input = session-input;
int i;
@@ -685,7 +692,8 @@ static void hidp_setup_quirks(struct hid
hid-quirks = hidp_blacklist[n].quirks;
 }
 
-static inline void hidp_setup_hid(struct hidp_session *session, struct 
hidp_connadd_req *req)
+static void hidp_setup_hid(struct hidp_session *session,
+   struct hidp_connadd_req *req)
 {
struct hid_device *hid = session-hid;
struct hid_report *report;
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 1/5] bluetooth: hidp_process_hid_control remove unnecessary parameter dealing

2007-11-21 Thread akpm

From: Dave Young [EMAIL PROTECTED]

According to the bluetooth HID spec v1.0 chapter 7.4.2

This code requests a major state change in a BT-HID device.  A HID_CONTROL
request does not generate a HANDSHAKE response.

A HID_CONTROL packet with a parameter of VIRTUAL_CABLE_UNPLUG is the only
HID_CONTROL packet a device can send to a host.  A host will ignore all other
packets.

So in the hidp_precess_hid_control function, we just need to deal with the
UNLUG packet.

Signed-off-by: Dave Young [EMAIL PROTECTED]
Cc: Marcel Holtmann [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 net/bluetooth/hidp/core.c |   19 +--
 1 file changed, 1 insertion(+), 18 deletions(-)

diff -puN 
net/bluetooth/hidp/core.c~bluetooth-hidp_process_hid_control-remove-unnecessary-parameter-dealing
 net/bluetooth/hidp/core.c
--- 
a/net/bluetooth/hidp/core.c~bluetooth-hidp_process_hid_control-remove-unnecessary-parameter-dealing
+++ a/net/bluetooth/hidp/core.c
@@ -369,30 +369,13 @@ static inline void hidp_process_hid_cont
 {
BT_DBG(session %p param 0x%02x, session, param);
 
-   switch (param) {
-   case HIDP_CTRL_NOP:
-   break;
-
-   case HIDP_CTRL_VIRTUAL_CABLE_UNPLUG:
+   if (param == HIDP_CTRL_VIRTUAL_CABLE_UNPLUG) {
/* Flush the transmit queues */
skb_queue_purge(session-ctrl_transmit);
skb_queue_purge(session-intr_transmit);
 
/* Kill session thread */
atomic_inc(session-terminate);
-   break;
-
-   case HIDP_CTRL_HARD_RESET:
-   case HIDP_CTRL_SOFT_RESET:
-   case HIDP_CTRL_SUSPEND:
-   case HIDP_CTRL_EXIT_SUSPEND:
-   /* FIXME: We have to parse these and return no error */
-   break;
-
-   default:
-   __hidp_send_ctrl_message(session,
-   HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, 
NULL, 0);
-   break;
}
 }
 
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 5/5] bluetooth: blacklist another Broadcom BCM2035 device

2007-11-21 Thread akpm

From: Andy Shevchenko [EMAIL PROTECTED]

This device is recognized as bluetooth, but still not works.

Signed-off-by: Andy Shevchenko [EMAIL PROTECTED]
Cc: Marcel Holtmann [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/bluetooth/hci_usb.c |1 +
 1 file changed, 1 insertion(+)

diff -puN 
drivers/bluetooth/hci_usb.c~bluetooth-blacklist-another-broadcom-bcm2035-device 
drivers/bluetooth/hci_usb.c
--- 
a/drivers/bluetooth/hci_usb.c~bluetooth-blacklist-another-broadcom-bcm2035-device
+++ a/drivers/bluetooth/hci_usb.c
@@ -111,6 +111,7 @@ static struct usb_device_id blacklist_id
{ USB_DEVICE(0x0a5c, 0x2033), .driver_info = HCI_IGNORE },
 
/* Broadcom BCM2035 */
+   { USB_DEVICE(0x0a5c, 0x2035), .driver_info = HCI_RESET | 
HCI_WRONG_SCO_MTU },
{ USB_DEVICE(0x0a5c, 0x200a), .driver_info = HCI_RESET | 
HCI_WRONG_SCO_MTU },
{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = HCI_BCM92035 },
 
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 4/5] drivers/bluetooth/btsdio.c: fix double-free

2007-11-21 Thread akpm

From: Adrian Bunk [EMAIL PROTECTED]

This patch fixes a double-free spotted by the Coverity checker.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/bluetooth/btsdio.c |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff -puN drivers/bluetooth/btsdio.c~drivers-bluetooth-btsdioc-fix-double-free 
drivers/bluetooth/btsdio.c
--- a/drivers/bluetooth/btsdio.c~drivers-bluetooth-btsdioc-fix-double-free
+++ a/drivers/bluetooth/btsdio.c
@@ -162,10 +162,8 @@ static int btsdio_rx_packet(struct btsdi
bt_cb(skb)-pkt_type = hdr[3];
 
err = hci_recv_frame(skb);
-   if (err  0) {
-   kfree(skb);
+   if (err  0)
return err;
-   }
 
sdio_writeb(data-func, 0x00, REG_PC_RRT, NULL);
 
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 3/5] drivers/bluetooth/bpa10x.c: fix memleak

2007-11-21 Thread akpm

From: Adrian Bunk [EMAIL PROTECTED]

This patch fixea a memleak spotted by the Coverity checker.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/bluetooth/bpa10x.c |1 +
 1 file changed, 1 insertion(+)

diff -puN drivers/bluetooth/bpa10x.c~drivers-bluetooth-bpa10xc-fix-memleak 
drivers/bluetooth/bpa10x.c
--- a/drivers/bluetooth/bpa10x.c~drivers-bluetooth-bpa10xc-fix-memleak
+++ a/drivers/bluetooth/bpa10x.c
@@ -423,6 +423,7 @@ static int bpa10x_send_frame(struct sk_b
break;
 
default:
+   usb_free_urb(urb);
return -EILSEQ;
}
 
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] napi: conditional NAPI in drivers

2007-11-21 Thread Stephen Hemminger

There has been a pattern of bugs in the 2.6.24 conversion
where a driver is broken if NAPI is not configured.

Change all drivers that have conditional NAPI option to
have the datastructure missing so these bugs are caught at compile
time.

Compile tested only (but that's the point).

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

---
 drivers/net/amd8111e.h   |2 ++
 drivers/net/cassini.h|3 ++-
 drivers/net/chelsio/common.h |2 ++
 drivers/net/forcedeth.c  |3 ++-
 drivers/net/gianfar.h|2 ++
 drivers/net/ixgb/ixgb.h  |2 ++
 drivers/net/pcnet32.c|2 ++
 drivers/net/s2io.h   |2 ++
 drivers/net/starfire.c   |2 ++
 drivers/net/ucc_geth.h   |2 ++
 drivers/net/via-rhine.c  |2 ++
 11 files changed, 22 insertions(+), 2 deletions(-)

--- a/drivers/net/amd8111e.h2007-11-16 16:17:20.0 -0800
+++ b/drivers/net/amd8111e.h2007-11-21 10:04:20.0 -0800
@@ -763,7 +763,9 @@ struct amd8111e_priv{
/* Reg memory mapped address */
void __iomem *mmio;
 
+#ifdef CONFIG_AMD8111E_NAPI
struct napi_struct napi;
+#endif
 
spinlock_t lock;/* Guard lock */
unsigned long rx_idx, tx_idx;   /* The next free ring entry */
--- a/drivers/net/cassini.h 2007-11-16 16:17:20.0 -0800
+++ b/drivers/net/cassini.h 2007-11-21 10:07:25.0 -0800
@@ -4280,8 +4280,9 @@ struct cas {
int rx_cur[N_RX_COMP_RINGS], rx_new[N_RX_COMP_RINGS];
int rx_last[N_RX_DESC_RINGS];
 
+#ifdef CONFIG_CASSINI_NAPI
struct napi_struct napi;
-
+#endif
/* Set when chip is actually in operational state
 * (ie. not power managed) */
int hw_running;
--- a/drivers/net/chelsio/common.h  2007-11-16 16:17:20.0 -0800
+++ b/drivers/net/chelsio/common.h  2007-11-21 10:15:09.0 -0800
@@ -278,7 +278,9 @@ struct adapter {
struct peespi *espi;
struct petp   *tp;
 
+#ifdef CONFIG_CHELSIO_T1_NAPI
struct napi_struct napi;
+#endif
struct port_info port[MAX_NPORTS];
struct delayed_work stats_update_task;
struct timer_list stats_update_timer;
--- a/drivers/net/forcedeth.c   2007-11-19 18:56:12.0 -0800
+++ b/drivers/net/forcedeth.c   2007-11-21 10:10:43.0 -0800
@@ -748,8 +748,9 @@ struct fe_priv {
spinlock_t lock;
 
struct net_device *dev;
+#ifdef CONFIG_FORCEDETH_NAPI
struct napi_struct napi;
-
+#endif
/* General data:
 * Locking: spin_lock(np-lock); */
struct nv_ethtool_stats estats;
--- a/drivers/net/gianfar.h 2007-11-16 16:17:20.0 -0800
+++ b/drivers/net/gianfar.h 2007-11-21 10:13:25.0 -0800
@@ -691,7 +691,9 @@ struct gfar_private {
spinlock_t rxlock;
 
struct net_device *dev;
+#ifdef CONFIG_GFAR_NAPI
struct napi_struct napi;
+#endif
 
/* skb array and index */
struct sk_buff ** rx_skbuff;
--- a/drivers/net/ixgb/ixgb.h   2007-11-19 18:56:12.0 -0800
+++ b/drivers/net/ixgb/ixgb.h   2007-11-21 10:15:41.0 -0800
@@ -184,7 +184,9 @@ struct ixgb_adapter {
boolean_t rx_csum;
 
/* OS defined structs */
+#ifdef CONFIG_IXGB_NAPI
struct napi_struct napi;
+#endif
struct net_device *netdev;
struct pci_dev *pdev;
struct net_device_stats net_stats;
--- a/drivers/net/starfire.c2007-11-16 16:17:21.0 -0800
+++ b/drivers/net/starfire.c2007-11-21 10:10:13.0 -0800
@@ -596,7 +596,9 @@ struct netdev_private {
struct tx_done_desc *tx_done_q;
dma_addr_t tx_done_q_dma;
unsigned int tx_done;
+#ifdef CONFIG_ADAPTEC_STARFIRE_NAPI
struct napi_struct napi;
+#endif
struct net_device *dev;
struct net_device_stats stats;
struct pci_dev *pci_dev;
--- a/drivers/net/ucc_geth.h2007-11-16 16:17:21.0 -0800
+++ b/drivers/net/ucc_geth.h2007-11-21 10:14:04.0 -0800
@@ -1185,7 +1185,9 @@ struct ucc_geth_private {
struct ucc_geth_info *ug_info;
struct ucc_fast_private *uccf;
struct net_device *dev;
+#ifdef CONFIG_UGETH_NAPI
struct napi_struct napi;
+#endif
struct ucc_geth *ug_regs;
struct ucc_geth_init_pram *p_init_enet_param_shadow;
struct ucc_geth_exf_global_pram *p_exf_glbl_param;
--- a/drivers/net/via-rhine.c   2007-11-16 16:17:21.0 -0800
+++ b/drivers/net/via-rhine.c   2007-11-21 10:11:14.0 -0800
@@ -390,7 +390,9 @@ struct rhine_private {
struct pci_dev *pdev;
long pioaddr;
struct net_device *dev;
+#ifdef CONFIG_VIA_RHINE_NAPI
struct napi_struct napi;
+#endif
struct net_device_stats stats;
spinlock_t lock;
 
--- a/drivers/net/tulip/tulip.h 2007-11-16 16:17:21.0 -0800
+++ b/drivers/net/tulip/tulip.h 2007-11-21 10:23:38.0 -0800
@@ -353,7 +353,9 @@ struct tulip_private {
int chip_id;
int

[patch 1/8] forcedeth: power down phy when interface is down

2007-11-21 Thread akpm

From: Ed Swierk [EMAIL PROTECTED]

Bring the physical link down when the interface is down by placing the PHY
in power-down state, unless WOL is enabled.  This mirrors the behavior of
other drivers including e1000 and tg3.

Signed-off-by: Ed Swierk [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Cc: Ayaz Abdulla [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/forcedeth.c |   12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff -puN 
drivers/net/forcedeth.c~forcedeth-power-down-phy-when-interface-is-down 
drivers/net/forcedeth.c
--- a/drivers/net/forcedeth.c~forcedeth-power-down-phy-when-interface-is-down
+++ a/drivers/net/forcedeth.c
@@ -1312,9 +1312,9 @@ static int phy_init(struct net_device *d
/* some phys clear out pause advertisment on reset, set it back */
mii_rw(dev, np-phyaddr, MII_ADVERTISE, reg);
 
-   /* restart auto negotiation */
+   /* restart auto negotiation, power down phy */
mii_control = mii_rw(dev, np-phyaddr, MII_BMCR, MII_READ);
-   mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE);
+   mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE | BMCR_PDOWN);
if (mii_rw(dev, np-phyaddr, MII_BMCR, mii_control)) {
return PHY_ERROR;
}
@@ -4798,6 +4798,10 @@ static int nv_open(struct net_device *de
 
dprintk(KERN_DEBUG nv_open: begin\n);
 
+   /* power up phy */
+   mii_rw(dev, np-phyaddr, MII_BMCR,
+  mii_rw(dev, np-phyaddr, MII_BMCR, MII_READ)  ~BMCR_PDOWN);
+
/* erase previous misconfiguration */
if (np-driver_data  DEV_HAS_POWER_CNTRL)
nv_mac_reset(dev);
@@ -4980,6 +4984,10 @@ static int nv_close(struct net_device *d
if (np-wolenabled) {
writel(NVREG_PFF_ALWAYS|NVREG_PFF_MYADDR, base + 
NvRegPacketFilterFlags);
nv_start_rx(dev);
+   } else {
+   /* power down phy */
+   mii_rw(dev, np-phyaddr, MII_BMCR,
+  mii_rw(dev, np-phyaddr, MII_BMCR, MII_READ)|BMCR_PDOWN);
}
 
/* FIXME: power down nic */
_
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/5] udp: memory limitation by using udp_mem

2007-11-21 Thread Hideo AOKI

David Miller wrote:
 From: Hideo AOKI [EMAIL PROTECTED]
 Date: Thu, 15 Nov 2007 16:50:14 -0500

 +static inline int __ip_check_max_skb_pages(struct sock *sk, int size)
 +{
 +switch(sk-sk_protocol) {
 +case IPPROTO_UDP:
 +if (atomic_read(sk-sk_prot-memory_allocated) + size
 + sk-sk_prot-sysctl_mem[0])
 +return -ENOBUFS;
 +/* Fall through */  
 +default:
 +break;
 +}
 +return 0;
 +}
 +
snip

 These special case checks are all over the place.

 We don't have tests all over the place to see if a socket is TCP or
 DCCP or SCTP in order to implement memory accounting there, because we
 did it for connection oriented protocols cleanly, seperating things
 via callbacks etc.

 I would like to see the datagram memory accounting work similarly.

Hello,

I'm still thinking this and focusing on enhancement of above function.
However, I feel difficulty because socket buffer allocation of UDP
sending packet is in IP layer: ip_append_data(). Moreover, the function
is called from several protocols including TCP. This makes setting
callback hard without changing function interface or core data structure.

Then, I would like to know if the following implementation could be
acceptable.

 - Adding sk_datagram_{rw}mem_schedule() as a memory schedule function
   for datagram protocols.

 - Adding sk_wmem_schedule().
   In the function, sk_stream_wmem_schedule() is called if the caller
   socket is stream protocols. Moreover, sk_datagram_wmem_schedule()
   is called if the socket is datagram like this:

   int sk_wmem_schedule(struct sock *sk, int size)
   {
...
switch (sk-sk_type) {
case SOCK_STREAM:
return sk_stream_wmem_schedule(sk, size);
case SOCK_DGRAM:
return sk_datagram_wmem_schedule(sk, size);
default:
return 1;
}
   }

 - In ip_append_data(), sk_wmem_schedule() is called to execute
   memory accounting.

Please let me know if you have any comments about this.

Best regards,
Hideo

-- 
Hitachi Computer Products (America) Inc.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)

2007-11-21 Thread Ayaz Abdulla

The solution is to get the OEM to update their BIOS (instead of
integrating this patch) since the MCP61 specs indicate that the MAC
Address should be in correct order from BIOS.

By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it
could cause it to break on other OEM systems who have implemented it
correctly.

Thanks,
Ayaz



-Original Message-
From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] 
Sent: Wednesday, November 21, 2007 3:03 PM
To: [EMAIL PROTECTED]
Cc: netdev@vger.kernel.org; [EMAIL PROTECTED];
[EMAIL PROTECTED]; Ayaz Abdulla; [EMAIL PROTECTED]
Subject: [patch 2/8] forcedeth: fix MAC address detection on network
card (regression in 2.6.23)


From: Michael Pyne [EMAIL PROTECTED]

Partially revert a change to mac address detection introduced to the
forcedeth
driver.  The change was intended to correct mac address detection for
newer
nVidia chipsets where the mac address was stored in reverse order.  One
of
those chipsets appears to still have the mac address in reverse order
(or at
least, it does on my system).

The change that broke mac address detection for my card was commit
ef756b3e56c68a4d76d9d7b9a73fa8f4f739180f forcedeth: mac address
correct

My network card is an nVidia built-in Ethernet card, output from lspci
as
follows (with text and numeric ids):
$ lspci | grep Ethernet
00:07.0 Bridge: nVidia Corporation MCP61 Ethernet (rev a2)
$ lspci -n | grep 07.0
00:07.0 0680: 10de:03ef (rev a2)

The vendor id is, of course, nVidia.  The device id corresponds to the
NVIDIA_NVENET_19 entry.

The included patch fixes the MAC address detection on my system.
Interestingly, the MAC address appears to be in the range reserved for
my
motherboard manufacturer (Gigabyte) and not nVidia.

Signed-off-by: Michael J. Pyne [EMAIL PROTECTED]
Cc: Jeff Garzik [EMAIL PROTECTED]
Cc: Ayaz Abdulla [EMAIL PROTECTED]
Cc: [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 drivers/net/forcedeth.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN
drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network-c
ard-regression-in-2623 drivers/net/forcedeth.c
---
a/drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network
-card-regression-in-2623
+++ a/drivers/net/forcedeth.c
@@ -,7 +,7 @@ static struct pci_device_id pci_tbl[] = 
},
{   /* MCP61 Ethernet Controller */
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_NVENET_19),
-   .driver_data =
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTR
L|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_E
XTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR,
+   .driver_data =
DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTR
L|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_E
XTENDED|DEV_HAS_MGMT_UNIT,
},
{   /* MCP65 Ethernet Controller */
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA,
PCI_DEVICE_ID_NVIDIA_NVENET_20),
_
---
This email message is for the sole use of the intended recipient(s) and may 
contain
confidential information.  Any unauthorized review, use, disclosure or 
distribution
is prohibited.  If you are not the intended recipient, please contact the 
sender by
reply email and destroy all copies of the original message.
---
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)

2007-11-21 Thread Andrew Morton

On Wed, 21 Nov 2007 15:34:52 -0800
Ayaz Abdulla [EMAIL PROTECTED] wrote:

 The solution is to get the OEM to update their BIOS (instead of
 integrating this patch) since the MCP61 specs indicate that the MAC
 Address should be in correct order from BIOS.
 
 By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it
 could cause it to break on other OEM systems who have implemented it
 correctly.
 

Getting an OEM to fix their BIOS isn't always a simple thing...

Perhaps Michael's change should be enabled by a module parameter for
those who happen to have the busted BIOS?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)

2007-11-21 Thread Michael Pyne

On Wednesday 21 November 2007, Andrew Morton wrote:
 On Wed, 21 Nov 2007 15:34:52 -0800

 Ayaz Abdulla [EMAIL PROTECTED] wrote:
  The solution is to get the OEM to update their BIOS (instead of
  integrating this patch) since the MCP61 specs indicate that the MAC
  Address should be in correct order from BIOS.
 
  By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it
  could cause it to break on other OEM systems who have implemented it
  correctly.

 Getting an OEM to fix their BIOS isn't always a simple thing...

 Perhaps Michael's change should be enabled by a module parameter for
 those who happen to have the busted BIOS?

I have contacted the motherboard vendor about this a couple of weeks ago per 
Ayaz's request and have received no response.  I've also upgraded to the 
latest firmware for this motherboard and the bug remains.

I think it would be ideal if there were a way to detect broken MCP61's (i.e. 
those with a Gigabyte MAC ID instead of the nVidia one) and only reverse the 
MAC address then.  A module parameter would also work but then I'd need to 
remember to apply it. :)

Regards,
 - Michael Pyne


signature.asc
Description: This is a digitally signed message part.

Re: [RFC/PATCH] SO_NO_CHECK for IPv6

2007-11-21 Thread Jeff Garzik


YOSHIFUJI Hideaki / 吉藤英明 wrote:

In article [EMAIL PROTECTED] (at Wed, 21 Nov 2007 07:45:32 -0500), Jeff Garzik 
[EMAIL PROTECTED] says:


SO_NO_CHECK support for IPv6 appeared to be missing. This is presented,
based on a reading of net/ipv4/udp.c.


Disagree. UDP checksum is mandatory in IPv6.


Ah, you mean that I need to turn off UDP checksum on receive end as well 
in IPv6...  true.


For those interested, I am dealing with a UDP app that already does very 
strong checksumming and encryption, so additional software checksumming 
at the lower layers is quite simply a waste of CPU cycles.  Hardware 
checksumming is fine, as long as its free.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)

2007-11-21 Thread Jesper Juhl

On 22/11/2007, Michael Pyne [EMAIL PROTECTED] wrote:
 On Wednesday 21 November 2007, Andrew Morton wrote:
  On Wed, 21 Nov 2007 15:34:52 -0800
 
  Ayaz Abdulla [EMAIL PROTECTED] wrote:
   The solution is to get the OEM to update their BIOS (instead of
   integrating this patch) since the MCP61 specs indicate that the MAC
   Address should be in correct order from BIOS.
  
   By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it
   could cause it to break on other OEM systems who have implemented it
   correctly.
 
  Getting an OEM to fix their BIOS isn't always a simple thing...
 
  Perhaps Michael's change should be enabled by a module parameter for
  those who happen to have the busted BIOS?

 I have contacted the motherboard vendor about this a couple of weeks ago per
 Ayaz's request and have received no response.  I've also upgraded to the
 latest firmware for this motherboard and the bug remains.

 I think it would be ideal if there were a way to detect broken MCP61's (i.e.
 those with a Gigabyte MAC ID instead of the nVidia one) and only reverse the
 MAC address then.  A module parameter would also work but then I'd need to
 remember to apply it. :)


Hmm, MAC address makeups are not my strong point, but are the no rules
describing the various parts of the address that could perhaps be used
to infer programatically if the address seems to be reversed or not,
and then use that detection logic for all boards that are known to
potentially have the issue?
A module parameter that overrules the automatic detection (for when it
gets it wrong) would probably also be a good idea.


-- 
Jesper Juhl [EMAIL PROTECTED]
Don't top-post  http://www.catb.org/~esr/jargon/html/T/top-post.html
Plain text mails only, please  http://www.expita.com/nomime.html
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: HTB/HSFC shaping precision

2007-11-21 Thread Ryousei Takano

Hi jamal and denys,

  One message later, thats what i dreamed about :-)
  Subject: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module
  On website they have very good explanation...
  http://www.gridmpi.org/gridtcp.jsp

 That looks interesting - without reading the papers a few questions are
 developing in my brain cells; for example it looks very similar to what
 the chelsio NICs claim to do (which could be a good thing for TCP).
 Whenever i see someone implementing something in hardware, i always get
 flushes of patents.

Thanks for looking our web page.

PSPacer has quite accurate shaping precision.
The point is that special hardware like the chelsio NICs is not required of it.
PSPacer uses a gap packet, whose format is IEEE 802.3x pause frame,
to control the interval between outgoing packets.
As far as I know, it is a unique approach.

Best Regards,
Ryousei Takano
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC/PATCH] SO_NO_CHECK for IPv6

2007-11-21 Thread Herbert Xu

On Wed, Nov 21, 2007 at 07:17:40PM -0500, Jeff Garzik wrote:

 For those interested, I am dealing with a UDP app that already does very 
 strong checksumming and encryption, so additional software checksumming 
 at the lower layers is quite simply a waste of CPU cycles.  Hardware 
 checksumming is fine, as long as its free.

No matter how strong your underlying checksumming is it's not
going to protect the IPv6 header is it :)

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH RFC] [1/9] Core module symbol namespaces code and intro.

2007-11-21 Thread Andi Kleen


There seems to be rough consensus that the kernel currently has too many 
exported symbols. A lot of these exports are generally usable utility 
functions or important driver interfaces; but another large part are functions
intended by only one or two very specific modules for a very specific purpose.
One example is the TCP code. It has most of its internals exported, but 
only for use by tcp_ipv6.c (and now a few more by the TCP/IP congestion 
modules) 
But it doesn't make sense to include these exported for a specific module
functions into a broader kernel interface.   External modules assume
they can use these functions, but they were never intended for that.

This patch allows to export symbols only for specific modules by 
introducing symbol name spaces. A module name space has a white
list of modules that are allowed to import symbols for it; all others
can't use the symbols.

It adds two new macros: 

MODULE_NAMESPACE_ALLOW(namespace, module);

Allow module to import symbols from namespace. module is the module name without
.ko as displayed by lsmod.  Must be in the same module as the export
(and be duplicated if there are multiple modules exporting symbols
to a namespace).  Multiple allows for the same name space are allowed.

EXPORT_SYMBOL_NS(namespace, symbol);

Export symbol into namespace.  Only modules allowed for the namespace
will be able to use them. EXPORT_SYMBOL_NS implies GPL only
because it is only for internal interfaces.

The name spaces only work for module loading. I didn't find
a nice way to make them work inside the main kernel binary. This means
the name space is not enforced for modules that are built in.

The biggest amount of work is of course still open: to go over all the existing
exports and figure for which ones it makes sense to define a namespace.
I did it for TCP and UDP so far, but the kernel right now has nearly 10k
exports (with some dups) that would need to be checked and turned into
name spaces. I would expect any symbol that is only used by one or two
other modules is a strong candidate for a namespace; in some cases even more
with modules that are tightly coupled.

I am optimistic that in the end we will have a much more manageable 
kernel interface.

Caveats: 

Exports need one long word more memory.

I had to add some alignment magic to the existing EXPORT_SYMBOLs
to get the sections right. Tested on i386/x86-64, but I hope it also
still works on architectures with stricter alignment requirements
like ARM. Any testers for that?

---
 arch/arm/kernel/armksyms.c|2 
 include/asm-generic/vmlinux.lds.h |7 +
 include/linux/module.h|   71 +++
 kernel/module.c   |  137 +++---
 4 files changed, 177 insertions(+), 40 deletions(-)

Index: linux/include/linux/module.h
===
--- linux.orig/include/linux/module.h
+++ linux/include/linux/module.h
@@ -34,6 +34,7 @@ struct kernel_symbol
 {
unsigned long value;
const char *name;
+   const char *namespace;
 };
 
 struct modversion_info
@@ -167,49 +168,80 @@ struct notifier_block;
 #ifdef CONFIG_MODULES
 
 /* Get/put a kernel symbol (calls must be symmetric) */
-void *__symbol_get(const char *symbol);
-void *__symbol_get_gpl(const char *symbol);
+extern void *do_symbol_get(const char *symbol, struct module *caller);
+#define __symbol_get(sym) do_symbol_get(sym, THIS_MODULE)
 #define symbol_get(x) ((typeof(x))(__symbol_get(MODULE_SYMBOL_PREFIX #x)))
 
+struct module_ns {
+   char *name;
+   char *allowed;
+};
+
+#define NS_SEPARATOR .
+
+/*
+ * Allow module MODULE to reference namespace NS.
+ * MODULE is just the base module name with suffix or path.
+ * This must be declared in the module (or main kernel) as where the
+ * symbols are defined. When multiple modules export symbols from
+ * a single namespace all modules need to contain a full set
+ * of MODULE_NAMESPACE_ALLOWs.
+ */
+#define MODULE_NAMESPACE_ALLOW(ns, module) \
+   static const struct module_ns __knamespace_##module##_##_##ns \
+   asm(__knamespace_ #module NS_SEPARATOR #ns)   \
+   __attribute_used__  \
+   __attribute__((section(__knamespace), unused))\
+   = { #ns,  #module }
+
 #ifndef __GENKSYMS__
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
-#define __CRC_SYMBOL(sym, sec) \
+#define __CRC_SYMBOL(sym, sec, post, post2)\
extern void *__crc_##sym __attribute__((weak)); \
-   static const unsigned long __kcrctab_##sym  \
+   static const unsigned long __kcrctab_##sym##post\
+   asm(__kcrctab_ #sym post2)\
__attribute_used__  \

[PATCH RFC] [2/9] Fix duplicate symbol check to also check future gpl and unused symbols

2007-11-21 Thread Andi Kleen


This seems to have been forgotten earlier. Right now it was possible
for a normal symbol to override a future gpl symbol and similar.
I restructured the code a bit to avoid too much duplicated code.

---
 kernel/module.c |   45 -
 1 file changed, 24 insertions(+), 21 deletions(-)

Index: linux/kernel/module.c
===
--- linux.orig/kernel/module.c
+++ linux/kernel/module.c
@@ -1430,33 +1430,36 @@ EXPORT_SYMBOL_GPL(do_symbol_get);
  * Ensure that an exported symbol [global namespace] does not already exist
  * in the kernel or in some other module's exported symbol table.
  */
-static int verify_export_symbols(struct module *mod)
+
+static int check_duplicate(const struct kernel_symbol *syms, int num, struct 
module *owner)
 {
-   const char *name = NULL;
-   unsigned long i, ret = 0;
-   struct module *owner;
+   int i;
const unsigned long *crc;
 
-   for (i = 0; i  mod-num_syms; i++)
-   if (find_symbol(mod-syms[i].name, owner, crc, 1, mod)) {
-   name = mod-syms[i].name;
-   ret = -ENOEXEC;
-   goto dup;
-   }
-
-   for (i = 0; i  mod-num_gpl_syms; i++)
-   if (find_symbol(mod-gpl_syms[i].name, owner, crc, 1, mod)) {
-   name = mod-gpl_syms[i].name;
-   ret = -ENOEXEC;
-   goto dup;
+   for (i = 0; i  num; i++)
+   if (find_symbol(syms[i].name, owner, crc, 1, owner)) {
+   printk(KERN_ERR %s: exports duplicate symbol %s (owned 
by %s)\n,
+   owner-name, syms[i].name, module_name(owner));
+   return -ENOEXEC;
}
+   return 0;
+}
 
-dup:
+static int verify_export_symbols(struct module *mod)
+{
+   int ret = check_duplicate(mod-syms, mod-num_syms, mod);
if (ret)
-   printk(KERN_ERR %s: exports duplicate symbol %s (owned by 
%s)\n,
-   mod-name, name, module_name(owner));
-
-   return ret;
+   return ret;
+   ret = check_duplicate(mod-gpl_syms, mod-num_gpl_syms, mod);
+   if (ret)
+   return ret;
+   ret = check_duplicate(mod-unused_syms, mod-num_unused_syms, mod);
+   if (ret)
+   return ret;
+   ret = check_duplicate(mod-unused_gpl_syms, mod-num_unused_gpl_syms, 
mod);
+   if (ret)
+   return ret;
+   return check_duplicate(mod-gpl_future_syms, mod-num_gpl_future_syms, 
mod);
 }
 
 /* Change all symbols so that sh_value encodes the pointer directly. */
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH RFC] [4/9] modpost: Fix format string warnings

2007-11-21 Thread Andi Kleen


Fix wrong format strings in modpost exposed by the previous patch.
Including one missing argument -- some random data was printed instead.

---
 scripts/mod/modpost.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

Index: linux/scripts/mod/modpost.c
===
--- linux.orig/scripts/mod/modpost.c
+++ linux/scripts/mod/modpost.c
@@ -388,7 +388,7 @@ static int parse_elf(struct elf_info *in
 
/* Check if file offset is correct */
if (hdr-e_shoff  info-size) {
-   fatal(section header offset=%u in file '%s' is bigger then 
filesize=%lu\n, hdr-e_shoff, filename, info-size);
+   fatal(section header offset=%lu in file '%s' is bigger then 
filesize=%lu\n, (unsigned long)hdr-e_shoff, filename, info-size);
return 0;
}
 
@@ -409,7 +409,7 @@ static int parse_elf(struct elf_info *in
const char *secname;
 
if (sechdrs[i].sh_offset  info-size) {
-   fatal(%s is truncated. sechdrs[i].sh_offset=%u  
sizeof(*hrd)=%ul\n, filename, (unsigned int)sechdrs[i].sh_offset, 
sizeof(*hdr));
+   fatal(%s is truncated. sechdrs[i].sh_offset=%lu  
sizeof(*hrd)=%lu\n, filename, (unsigned long)sechdrs[i].sh_offset, 
sizeof(*hdr));
return 0;
}
secname = secstrings + sechdrs[i].sh_name;
@@ -907,7 +907,8 @@ static void warn_sec_mismatch(const char
 before '%s' (at offset -0x%llx)\n,
 modname, fromsec, (unsigned long long)r.r_offset,
 secname, refsymname,
-elf-strtab + after-st_name);
+elf-strtab + after-st_name,
+(unsigned long long)r.r_offset);
} else {
warn(%s(%s+0x%llx): Section mismatch: reference to %s:%s\n,
 modname, fromsec, (unsigned long long)r.r_offset,
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH RFC] [5/9] modpost: Fix a buffer overflow in modpost

2007-11-21 Thread Andi Kleen


When passing an file name  1k the stack could be overflowed.
Not really a security issue, but still better plugged.


---
 scripts/mod/modpost.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux/scripts/mod/modpost.c
===
--- linux.orig/scripts/mod/modpost.c
+++ linux/scripts/mod/modpost.c
@@ -1656,7 +1656,6 @@ int main(int argc, char **argv)
 {
struct module *mod;
struct buffer buf = { };
-   char fname[SZ];
char *kernel_read = NULL, *module_read = NULL;
char *dump_write = NULL;
int opt;
@@ -1709,6 +1708,8 @@ int main(int argc, char **argv)
err = 0;
 
for (mod = modules; mod; mod = mod-next) {
+   char fname[strlen(mod-name) + 10];
+
if (mod-skip)
continue;
 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH RFC] [6/9] Implement namespace checking in modpost

2007-11-21 Thread Andi Kleen


This checks the namespaces at build time in modpost

---
 scripts/mod/modpost.c |  344 ++
 1 file changed, 317 insertions(+), 27 deletions(-)

Index: linux/scripts/mod/modpost.c
===
--- linux.orig/scripts/mod/modpost.c
+++ linux/scripts/mod/modpost.c
@@ -1,8 +1,9 @@
-/* Postprocess module symbol versions
+/* Postprocess module symbol versions and do various other module checks.
  *
  * Copyright 2003   Kai Germaschewski
  * Copyright 2002-2004  Rusty Russell, IBM Corporation
  * Copyright 2006   Sam Ravnborg
+ * Copyright 2007  Andi Kleen, SUSE Labs (changes licensed GPLv2 only)
  * Based in part on module-init-tools/depmod.c,file2alias
  *
  * This software may be used and distributed according to the terms
@@ -12,9 +13,13 @@
  */
 
 #include ctype.h
+#include assert.h
 #include modpost.h
 #include ../../include/linux/license.h
 
+#define NS_SEPARATOR '.'
+#define NS_SEPARATOR_STRING .
+
 /* Are we using CONFIG_MODVERSIONS? */
 int modversions = 0;
 /* Warn about undefined symbols? (do so if we have vmlinux) */
@@ -27,6 +32,9 @@ static int external_module = 0;
 static int vmlinux_section_warnings = 1;
 /* Only warn about unresolved symbols */
 static int warn_unresolved = 0;
+/* Fixing those would cause too many ifdefs -- off by default. */
+static int warn_missing_modules = 0;
+
 /* How a symbol is exported */
 enum export {
export_plain,  export_unused, export_gpl,
@@ -105,19 +113,43 @@ static struct module *find_module(char *
return mod;
 }
 
-static struct module *new_module(char *modname)
+static const char *basename(const char *s)
+{
+   char *p = strrchr(s, '/');
+   if (p)
+   return p + 1;
+   return s;
+}
+
+static struct module *find_module_base(char *modname)
 {
struct module *mod;
-   char *p, *s;
 
-   mod = NOFAIL(malloc(sizeof(*mod)));
-   memset(mod, 0, sizeof(*mod));
-   p = NOFAIL(strdup(modname));
+   for (mod = modules; mod; mod = mod-next) {
+   if (strcmp(basename(mod-name), modname) == 0)
+   break;
+   }
+   return mod;
+}
 
+static void strip_o(char *p)
+{
+   char *s;
/* strip trailing .o */
if ((s = strrchr(p, '.')) != NULL)
if (strcmp(s, .o) == 0)
*s = '\0';
+}
+
+static struct module *new_module(char *modname)
+{
+   struct module *mod;
+   char *p;
+
+   mod = NOFAIL(malloc(sizeof(*mod)));
+   memset(mod, 0, sizeof(*mod));
+   p = NOFAIL(strdup(modname));
+   strip_o(p);
 
/* add to list */
mod-name = p;
@@ -132,10 +164,12 @@ static struct module *new_module(char *m
  * struct symbol is also used for lists of unresolved symbols */
 
 #define SYMBOL_HASH_SIZE 1024
+#define NSALLOW_HASH_SIZE 64
 
 struct symbol {
struct symbol *next;
struct module *module;
+   const char *namespace;
unsigned int crc;
int crc_valid;
unsigned int weak:1;
@@ -147,10 +181,19 @@ struct symbol {
char name[0];
 };
 
+struct nsallow {
+   struct nsallow *next;
+   struct module *mod;
+   struct module *orig;
+   int ref;
+   char name[0];
+};
+
 static struct symbol *symbolhash[SYMBOL_HASH_SIZE];
+static struct nsallow *nsallowhash[NSALLOW_HASH_SIZE];
 
 /* This is based on the hash agorithm from gdbm, via tdb */
-static inline unsigned int tdb_hash(const char *name)
+static unsigned int tdb_hash(const char *name)
 {
unsigned value; /* Used to compute the hash value.  */
unsigned   i;   /* Used to cycle through random values. */
@@ -192,21 +235,67 @@ static struct symbol *new_symbol(const c
return new;
 }
 
-static struct symbol *find_symbol(const char *name)
+static struct symbol *find_symbol(const char *name, const char *ns)
 {
-   struct symbol *s;
+   struct symbol *s, *match;
 
/* For our purposes, .foo matches foo.  PPC64 needs this. */
if (name[0] == '.')
name++;
 
+   match = NULL;
for (s = symbolhash[tdb_hash(name) % SYMBOL_HASH_SIZE]; s; s=s-next) {
+   if (strcmp(s-name, name) == 0) {
+   match = s;
+   if (ns  s-namespace  strcmp(s-namespace, ns))
+   continue;
+   return s;
+   }
+   }
+   return ns ? NULL : match;
+}
+
+static struct nsallow *find_nsallow(const char *name, struct module *mod)
+{
+   struct nsallow *s;
+
+   for (s = nsallowhash[tdb_hash(name)%NSALLOW_HASH_SIZE]; s; s=s-next) {
+   if (strcmp(s-name, name) == 0  s-mod == mod)
+   return s;
+   }
+   return NULL;
+}
+
+static struct nsallow *find_nsallow_name(const char *name)
+{
+   struct nsallow *s;
+
+   for (s = nsallowhash[tdb_hash(name)%NSALLOW_HASH_SIZE]; s;

[PATCH RFC] [3/9] modpost: Declare the modpost error functions as printf like

2007-11-21 Thread Andi Kleen


This way gcc can warn for wrong format strings

---
 scripts/mod/modpost.c |8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

Index: linux/scripts/mod/modpost.c
===
--- linux.orig/scripts/mod/modpost.c
+++ linux/scripts/mod/modpost.c
@@ -33,7 +33,9 @@ enum export {
export_unused_gpl, export_gpl_future, export_unknown
 };
 
-void fatal(const char *fmt, ...)
+#define PRINTF __attribute__ ((format (printf, 1, 2)))
+
+PRINTF void fatal(const char *fmt, ...)
 {
va_list arglist;
 
@@ -46,7 +48,7 @@ void fatal(const char *fmt, ...)
exit(1);
 }
 
-void warn(const char *fmt, ...)
+PRINTF void warn(const char *fmt, ...)
 {
va_list arglist;
 
@@ -57,7 +59,7 @@ void warn(const char *fmt, ...)
va_end(arglist);
 }
 
-void merror(const char *fmt, ...)
+PRINTF void merror(const char *fmt, ...)
 {
va_list arglist;
 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH RFC] [9/9] Add a inet namespace

2007-11-21 Thread Andi Kleen


Shared by IP, IPv6, DCCP, UDPLITE, SCTP. 

The symbols used by tunnel modules weren't put into any name space
because there are quite a lot of them.

---
 net/core/fib_rules.c|9 --
 net/ipv4/af_inet.c  |   52 
 net/ipv4/arp.c  |1 
 net/ipv4/icmp.c |   10 +++
 net/ipv4/inet_connection_sock.c |   40 +++---
 net/ipv4/inet_diag.c|4 +--
 net/ipv4/inet_hashtables.c  |8 +++---
 net/ipv4/inet_timewait_sock.c   |   12 -
 net/ipv4/ip_input.c |2 -
 net/ipv4/ip_output.c|7 +++--
 net/ipv4/ip_sockglue.c  |   10 +++
 11 files changed, 86 insertions(+), 69 deletions(-)

Index: linux/net/ipv4/af_inet.c
===
--- linux.orig/net/ipv4/af_inet.c
+++ linux/net/ipv4/af_inet.c
@@ -218,7 +218,7 @@ out:
 }
 
 u32 inet_ehash_secret __read_mostly;
-EXPORT_SYMBOL(inet_ehash_secret);
+EXPORT_SYMBOL_NS(inet, inet_ehash_secret);
 
 /*
  * inet_ehash_secret must be set exactly once
@@ -235,7 +235,7 @@ void build_ehash_secret(void)
inet_ehash_secret = rnd;
spin_unlock_bh(inetsw_lock);
 }
-EXPORT_SYMBOL(build_ehash_secret);
+EXPORT_SYMBOL_NS(inet, build_ehash_secret);
 
 /*
  * Create an inet socket.
@@ -1127,7 +1127,7 @@ int inet_sk_rebuild_header(struct sock *
return err;
 }
 
-EXPORT_SYMBOL(inet_sk_rebuild_header);
+EXPORT_SYMBOL_NS(inet,inet_sk_rebuild_header);
 
 static int inet_gso_send_check(struct sk_buff *skb)
 {
@@ -1235,6 +1235,8 @@ unsigned long snmp_fold_field(void *mib[
}
return res;
 }
+/* AK: Not in inet namespace because they're a generic facility. Probably
+   should be in another file though. */
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
 int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
@@ -1499,20 +1501,30 @@ static int __init ipv4_proc_init(void)
 
 MODULE_ALIAS_NETPROTO(PF_INET);
 
-EXPORT_SYMBOL(inet_accept);
-EXPORT_SYMBOL(inet_bind);
-EXPORT_SYMBOL(inet_dgram_connect);
-EXPORT_SYMBOL(inet_dgram_ops);
-EXPORT_SYMBOL(inet_getname);
-EXPORT_SYMBOL(inet_ioctl);
-EXPORT_SYMBOL(inet_listen);
-EXPORT_SYMBOL(inet_register_protosw);
-EXPORT_SYMBOL(inet_release);
-EXPORT_SYMBOL(inet_sendmsg);
-EXPORT_SYMBOL(inet_shutdown);
-EXPORT_SYMBOL(inet_sock_destruct);
-EXPORT_SYMBOL(inet_stream_connect);
-EXPORT_SYMBOL(inet_stream_ops);
-EXPORT_SYMBOL(inet_unregister_protosw);
-EXPORT_SYMBOL(net_statistics);
-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
+MODULE_NAMESPACE_ALLOW(inet, ipv6);
+MODULE_NAMESPACE_ALLOW(inet, udplite);
+MODULE_NAMESPACE_ALLOW(inet, dccp_ipv6);
+MODULE_NAMESPACE_ALLOW(inet, dccp_ipv4);
+MODULE_NAMESPACE_ALLOW(inet, dccp);
+MODULE_NAMESPACE_ALLOW(inet, sctp);
+
+/* RED-PEN: would be better to fix wanrouter */
+MODULE_NAMESPACE_ALLOW(inet, wanrouter);
+
+EXPORT_SYMBOL_NS(inet,inet_accept);
+EXPORT_SYMBOL_NS(inet,inet_bind);
+EXPORT_SYMBOL_NS(inet,inet_dgram_connect);
+EXPORT_SYMBOL_NS(inet,inet_dgram_ops);
+EXPORT_SYMBOL_NS(inet,inet_getname);
+EXPORT_SYMBOL_NS(inet,inet_ioctl);
+EXPORT_SYMBOL_NS(inet,inet_listen);
+EXPORT_SYMBOL_NS(inet,inet_register_protosw);
+EXPORT_SYMBOL_NS(inet,inet_release);
+EXPORT_SYMBOL_NS(inet,inet_sendmsg);
+EXPORT_SYMBOL_NS(inet,inet_shutdown);
+EXPORT_SYMBOL_NS(inet,inet_sock_destruct);
+EXPORT_SYMBOL_NS(inet,inet_stream_connect);
+EXPORT_SYMBOL_NS(inet,inet_stream_ops);
+EXPORT_SYMBOL_NS(inet,inet_unregister_protosw);
+EXPORT_SYMBOL_NS(inet,net_statistics);
+EXPORT_SYMBOL_NS(inet,sysctl_ip_nonlocal_bind);
Index: linux/net/ipv4/arp.c
===
--- linux.orig/net/ipv4/arp.c
+++ linux/net/ipv4/arp.c
@@ -1406,6 +1406,7 @@ static int __init arp_proc_init(void)
 
 #endif /* CONFIG_PROC_FS */
 
+/* No namespace because those are used by various drivers */
 EXPORT_SYMBOL(arp_broken_ops);
 EXPORT_SYMBOL(arp_find);
 EXPORT_SYMBOL(arp_create);
Index: linux/net/ipv4/icmp.c
===
--- linux.orig/net/ipv4/icmp.c
+++ linux/net/ipv4/icmp.c
@@ -1101,7 +1101,7 @@ void __init icmp_init(struct net_proto_f
}
 }
 
-EXPORT_SYMBOL(icmp_err_convert);
-EXPORT_SYMBOL(icmp_send);
-EXPORT_SYMBOL(icmp_statistics);
-EXPORT_SYMBOL(xrlim_allow);
+EXPORT_SYMBOL_NS(inet, icmp_err_convert);
+EXPORT_SYMBOL_NS(inet, icmp_send);
+EXPORT_SYMBOL_NS(inet, icmp_statistics);
+EXPORT_SYMBOL_NS(inet, xrlim_allow);
Index: linux/net/ipv4/inet_connection_sock.c
===
--- linux.orig/net/ipv4/inet_connection_sock.c
+++ linux/net/ipv4/inet_connection_sock.c
@@ -26,7 +26,7 @@
 
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = inet_csk BUG: unknown timer value\n;
-EXPORT_SYMBOL(inet_csk_timer_bug_msg);
+EXPORT_SYMBOL_NS(inet, inet_csk_timer_bug_msg);
 #endif
 
 /*
@@ -73,7 +73,7 @@ int

[PATCH RFC] [8/9] Put UDP exports into a namespace

2007-11-21 Thread Andi Kleen


The UDP exports are only used by UDPv6 and UDP lite. They are internal functions
not supposed to be used by anybody else. So turn them into a name space that 
only allows those.

---
 net/ipv4/udp.c |   27 +++
 net/ipv4/udplite.c |6 +++---
 2 files changed, 18 insertions(+), 15 deletions(-)

Index: linux/net/ipv4/udp.c
===
--- linux.orig/net/ipv4/udp.c
+++ linux/net/ipv4/udp.c
@@ -105,6 +105,9 @@
 #include net/xfrm.h
 #include udp_impl.h
 
+MODULE_NAMESPACE_ALLOW(udp, udplite);
+MODULE_NAMESPACE_ALLOW(udp, ipv6);
+
 /*
  * Snmp MIB for the UDP layer
  */
@@ -1641,18 +1644,18 @@ void udp4_proc_exit(void)
 }
 #endif /* CONFIG_PROC_FS */
 
-EXPORT_SYMBOL(udp_disconnect);
-EXPORT_SYMBOL(udp_hash);
-EXPORT_SYMBOL(udp_hash_lock);
-EXPORT_SYMBOL(udp_ioctl);
-EXPORT_SYMBOL(udp_get_port);
-EXPORT_SYMBOL(udp_prot);
-EXPORT_SYMBOL(udp_sendmsg);
-EXPORT_SYMBOL(udp_lib_getsockopt);
-EXPORT_SYMBOL(udp_lib_setsockopt);
-EXPORT_SYMBOL(udp_poll);
+EXPORT_SYMBOL_NS(udp, udp_disconnect);
+EXPORT_SYMBOL_NS(udp, udp_hash);
+EXPORT_SYMBOL_NS(udp, udp_hash_lock);
+EXPORT_SYMBOL_NS(udp, udp_ioctl);
+EXPORT_SYMBOL_NS(udp, udp_get_port);
+EXPORT_SYMBOL_NS(udp, udp_prot);
+EXPORT_SYMBOL_NS(udp, udp_sendmsg);
+EXPORT_SYMBOL_NS(udp, udp_lib_getsockopt);
+EXPORT_SYMBOL_NS(udp, udp_lib_setsockopt);
+EXPORT_SYMBOL_NS(udp, udp_poll);
 
 #ifdef CONFIG_PROC_FS
-EXPORT_SYMBOL(udp_proc_register);
-EXPORT_SYMBOL(udp_proc_unregister);
+EXPORT_SYMBOL_NS(udp, udp_proc_register);
+EXPORT_SYMBOL_NS(udp, udp_proc_unregister);
 #endif
Index: linux/net/ipv4/udplite.c
===
--- linux.orig/net/ipv4/udplite.c
+++ linux/net/ipv4/udplite.c
@@ -113,6 +113,6 @@ out_register_err:
printk(KERN_CRIT %s: Cannot add UDP-Lite protocol.\n, __FUNCTION__);
 }
 
-EXPORT_SYMBOL(udplite_hash);
-EXPORT_SYMBOL(udplite_prot);
-EXPORT_SYMBOL(udplite_get_port);
+EXPORT_SYMBOL_NS(udp, udplite_hash);
+EXPORT_SYMBOL_NS(udp, udplite_prot);
+EXPORT_SYMBOL_NS(udp, udplite_get_port);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH RFC] [7/9] Convert TCP exports into namespaces

2007-11-21 Thread Andi Kleen


I defined two namespaces: tcp for TCP internals which are only used by 
tcp_ipv6.ko And tcpcong for exports used by the TCP congestion modules

No need to export any TCP internals to anybody else. So express this in a 
namespace.

I admit I'm not 100% sure tcpcong makes sense -- there might be a legitimate
need to have external out of tree congestion modules. They seem nearly like
drivers, but only nearly. If that was deemed the case it would be possible to 
remove tcpcong again to allow everybody to access this.

This implicitely turns all exports into GPL only, but that won't matter
because all modules allowed to import TCP functions are GPLed.

---
 net/ipv4/tcp.c   |   71 +++
 net/ipv4/tcp_cong.c  |   14 -
 net/ipv4/tcp_input.c |   12 +++
 net/ipv4/tcp_ipv4.c  |   38 -
 net/ipv4/tcp_minisocks.c |   12 +++
 net/ipv4/tcp_output.c|   12 +++
 net/ipv4/tcp_timer.c |2 -
 7 files changed, 87 insertions(+), 74 deletions(-)

Index: linux/net/ipv4/tcp.c
===
--- linux.orig/net/ipv4/tcp.c
+++ linux/net/ipv4/tcp.c
@@ -275,21 +275,21 @@ DEFINE_SNMP_STAT(struct tcp_mib, tcp_sta
 
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
 
-EXPORT_SYMBOL_GPL(tcp_orphan_count);
+EXPORT_SYMBOL_NS(tcp, tcp_orphan_count);
 
 int sysctl_tcp_mem[3] __read_mostly;
 int sysctl_tcp_wmem[3] __read_mostly;
 int sysctl_tcp_rmem[3] __read_mostly;
 
-EXPORT_SYMBOL(sysctl_tcp_mem);
-EXPORT_SYMBOL(sysctl_tcp_rmem);
-EXPORT_SYMBOL(sysctl_tcp_wmem);
+EXPORT_SYMBOL_NS(tcp, sysctl_tcp_mem);
+EXPORT_SYMBOL_NS(tcp, sysctl_tcp_rmem);
+EXPORT_SYMBOL_NS(tcp, sysctl_tcp_wmem);
 
 atomic_t tcp_memory_allocated; /* Current allocated memory. */
 atomic_t tcp_sockets_allocated;/* Current number of TCP sockets. */
 
-EXPORT_SYMBOL(tcp_memory_allocated);
-EXPORT_SYMBOL(tcp_sockets_allocated);
+EXPORT_SYMBOL_NS(tcp, tcp_memory_allocated);
+EXPORT_SYMBOL_NS(tcp, tcp_sockets_allocated);
 
 /*
  * Pressure flag: try to collapse.
@@ -299,7 +299,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
  */
 int tcp_memory_pressure __read_mostly;
 
-EXPORT_SYMBOL(tcp_memory_pressure);
+EXPORT_SYMBOL_NS(tcp, tcp_memory_pressure);
 
 void tcp_enter_memory_pressure(void)
 {
@@ -309,7 +309,7 @@ void tcp_enter_memory_pressure(void)
}
 }
 
-EXPORT_SYMBOL(tcp_enter_memory_pressure);
+EXPORT_SYMBOL_NS(tcp, tcp_enter_memory_pressure);
 
 /*
  * Wait for a TCP event.
@@ -1995,7 +1995,7 @@ int compat_tcp_setsockopt(struct sock *s
return do_tcp_setsockopt(sk, level, optname, optval, optlen);
 }
 
-EXPORT_SYMBOL(compat_tcp_setsockopt);
+EXPORT_SYMBOL_NS(tcp, compat_tcp_setsockopt);
 #endif
 
 /* Return information about state of tcp endpoint in API format. */
@@ -2061,7 +2061,7 @@ void tcp_get_info(struct sock *sk, struc
info-tcpi_total_retrans = tp-total_retrans;
 }
 
-EXPORT_SYMBOL_GPL(tcp_get_info);
+EXPORT_SYMBOL_NS(tcp, tcp_get_info);
 
 static int do_tcp_getsockopt(struct sock *sk, int level,
int optname, char __user *optval, int __user *optlen)
@@ -2174,7 +2174,7 @@ int compat_tcp_getsockopt(struct sock *s
return do_tcp_getsockopt(sk, level, optname, optval, optlen);
 }
 
-EXPORT_SYMBOL(compat_tcp_getsockopt);
+EXPORT_SYMBOL_NS(tcp, compat_tcp_getsockopt);
 #endif
 
 struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
@@ -2262,7 +2262,7 @@ struct sk_buff *tcp_tso_segment(struct s
 out:
return segs;
 }
-EXPORT_SYMBOL(tcp_tso_segment);
+EXPORT_SYMBOL_NS(tcp, tcp_tso_segment);
 
 #ifdef CONFIG_TCP_MD5SIG
 static unsigned long tcp_md5sig_users;
@@ -2298,7 +2298,7 @@ void tcp_free_md5sig_pool(void)
__tcp_free_md5sig_pool(pool);
 }
 
-EXPORT_SYMBOL(tcp_free_md5sig_pool);
+EXPORT_SYMBOL_NS(tcp, tcp_free_md5sig_pool);
 
 static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
 {
@@ -2371,7 +2371,7 @@ retry:
return pool;
 }
 
-EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
+EXPORT_SYMBOL_NS(tcp, tcp_alloc_md5sig_pool);
 
 struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
 {
@@ -2384,14 +2384,14 @@ struct tcp_md5sig_pool *__tcp_get_md5sig
return (p ? *per_cpu_ptr(p, cpu) : NULL);
 }
 
-EXPORT_SYMBOL(__tcp_get_md5sig_pool);
+EXPORT_SYMBOL_NS(tcp, __tcp_get_md5sig_pool);
 
 void __tcp_put_md5sig_pool(void)
 {
tcp_free_md5sig_pool();
 }
 
-EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+EXPORT_SYMBOL_NS(tcp, __tcp_put_md5sig_pool);
 #endif
 
 void tcp_done(struct sock *sk)
@@ -2409,7 +2409,7 @@ void tcp_done(struct sock *sk)
else
inet_csk_destroy_sock(sk);
 }
-EXPORT_SYMBOL_GPL(tcp_done);
+EXPORT_SYMBOL_NS(tcp, tcp_done);
 
 extern void __skb_cb_too_small_for_tcp(int, int);
 extern struct tcp_congestion_ops tcp_reno;
@@ -2524,15 +2524,28 @@ void __init tcp_init(void)
tcp_register_congestion_control(tcp_reno);
 }
 
-EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(tcp_disconnect);

Re: [RFC/PATCH] SO_NO_CHECK for IPv6

2007-11-21 Thread YOSHIFUJI Hideaki / 吉藤英明

In article [EMAIL PROTECTED] (at Thu, 22 Nov 2007 10:34:03 +0800), Herbert Xu 
[EMAIL PROTECTED] says:

 On Wed, Nov 21, 2007 at 07:17:40PM -0500, Jeff Garzik wrote:
 
  For those interested, I am dealing with a UDP app that already does very 
  strong checksumming and encryption, so additional software checksumming 
  at the lower layers is quite simply a waste of CPU cycles.  Hardware 
  checksumming is fine, as long as its free.
 
 No matter how strong your underlying checksumming is it's not
 going to protect the IPv6 header is it :)

In that sense, we should use AH.

--yoshfuji
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module

2007-11-21 Thread Ryousei Takano

Hi jamal,

 Good stuff.
 I have not read your paper - There are NICs out there (chelsio comes to
 mind) which claim to do pacing and have shown impressive numbers with
 TCP. Is your approach similar? Are there patents involved by some of
 these hardware vendors? (It would not be suprising if they exist).
 
As far as I know, no. (I have not the details of chelsio NICs.)
Pacing is a general idea, but our approach of implementation is a unique.
PSPacer makes bursty traffic which is often generated by TCP smooth
without any special hardware.

 The advantage with NICs is they have very good control of the timing
 (clock granularity being extremely important in cases like this) - what
 were your measurements based on i.e what clock source did you use on
 Linux?

The key idea of PSPacer is to determine transmission timing of packets 
by the number of bytes transferred. If packets are transferred back to 
back, the timing a packet is sent can be determined by the number of 
bytes sent before the packet. PSPacer fills the gaps between time 
aligned real packets (the packets which are sent by user program) by 
gap packets. The real packets and gap packets are sent back to back, 
and thus the timing of transmission of each real packet can be precisely 
controlled by adjusting the gap packet size. As the gap packets, the IEEE 
802.3x PAUSE frames are used. PAUSE frames are discarded at a switch 
input port, and only real packets go through the switch keeping the 
original intervals. 

In the past, some software-based pacing schemes have been proposed.
These schemes use timer interrupt based packet transmission timing control.
Therefore, to achieve precise pacing, they require the operating system
to maintain a high resolution timer, which could incur a large overhead.

 Also, the idea of using a PAUSE frame to add gaps is interesting, but
 you should note that in linux a qdisc may be attached to any network
 device and this for example maybe a PPP device etc. What would you use
 for gaps in that case? 


 I apologize if the answers are in your papers - i just glossed over.
 
 cheers,
 jamal 
 
Best regards,
Ryousei Takano
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.

2007-11-21 Thread Arjan van de Ven

On Thu, 22 Nov 2007 03:43:06 +0100 (CET)
Andi Kleen [EMAIL PROTECTED] wrote:

 
 There seems to be rough consensus that the kernel currently has too
 many exported symbols. A lot of these exports are generally usable
 utility functions or important driver interfaces; but another large
 part are functions intended by only one or two very specific modules
 for a very specific purpose. One example is the TCP code. It has most
 of its internals exported, but only for use by tcp_ipv6.c (and now a
 few more by the TCP/IP congestion modules) But it doesn't make sense
 to include these exported for a specific module functions into a
 broader kernel interface.   External modules assume they can use
 these functions, but they were never intended for that.
 
 This patch allows to export symbols only for specific modules by 
 introducing symbol name spaces. A module name space has a white
 list of modules that are allowed to import symbols for it; all others
 can't use the symbols.
 
 It adds two new macros: 
 
 MODULE_NAMESPACE_ALLOW(namespace, module);
 
 Allow module to import symbols from namespace. module is the module
 name without .ko as displayed by lsmod.  Must be in the same module
 as the export (and be duplicated if there are multiple modules
 exporting symbols to a namespace).  Multiple allows for the same name
 space are allowed.
 
 EXPORT_SYMBOL_NS(namespace, symbol);
 

Hi,

I like this concept in general; I have one minor comment; right now
your namespace argument is like

EXPORT_SYMBOL_NS(foo, some_symbol);

from a language-like pov I kinda wonder if it's nicer to do

EXPORT_SYMBOL_NS(foo, some_symbol);

because foo isn't something in C scope, but more a string-like
identifier...



-- 
If you want to reach me at my work email, use [EMAIL PROTECTED]
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module

2007-11-21 Thread TAKANO Ryousei

I am sorry I send an unfinished mail.

  Also, the idea of using a PAUSE frame to add gaps is interesting, but
  you should note that in linux a qdisc may be attached to any network
  device and this for example maybe a PPP device etc. What would you use
  for gaps in that case? 
 
You are right. PSPacer depends on the Ethernet, and it is not pretty
good. Now I do not have any ideas for the other network devices.
Do anyone have any ideas or suggestions?

Best regards,
Ryousei Takano

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.

2007-11-21 Thread Andi Kleen


 I like this concept in general; I have one minor comment; right now
 your namespace argument is like
 
 EXPORT_SYMBOL_NS(foo, some_symbol);
 
 from a language-like pov I kinda wonder if it's nicer to do
 
 EXPORT_SYMBOL_NS(foo, some_symbol);
 
 because foo isn't something in C scope, but more a string-like
 identifier...

That wouldn't work for MODULE_ALLOW() because it appends the namespace
to other identifiers. I don't know of a way in the C processor to get
back from a string to a ## concatenable identifier.

For EXPORT_SYMBOL_NS it would be in theory possible, but making 
it asymmetric to MODULE_ALLOW would be ugly imho.

-Andi
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.

2007-11-21 Thread Dave Jones

On Thu, Nov 22, 2007 at 03:43:06AM +0100, Andi Kleen wrote:

  There seems to be rough consensus that the kernel currently has too many 
  exported symbols. A lot of these exports are generally usable utility 
  functions or important driver interfaces; but another large part are 
  functions
  intended by only one or two very specific modules for a very specific 
  purpose.
  One example is the TCP code. It has most of its internals exported, but 
  only for use by tcp_ipv6.c (and now a few more by the TCP/IP congestion 
  modules) 
  But it doesn't make sense to include these exported for a specific module
  functions into a broader kernel interface.   External modules assume
  they can use these functions, but they were never intended for that.
  
  This patch allows to export symbols only for specific modules by 
  introducing symbol name spaces. A module name space has a white
  list of modules that are allowed to import symbols for it; all others
  can't use the symbols.

I really like this patchset.   Definitely a step in the right direction imo.
Looks like some nits there that checkpatch will probably pick up on,
but otherwise, looks very straightforward too.

Kudos.

Dave

-- 
http://www.codemonkey.org.uk
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.

2007-11-21 Thread Rusty Russell

On Thursday 22 November 2007 13:43:06 Andi Kleen wrote:
 There seems to be rough consensus that the kernel currently has too many
 exported symbols. A lot of these exports are generally usable utility
 functions or important driver interfaces; but another large part are
 functions intended by only one or two very specific modules for a very
 specific purpose.

Hi Andi,

This is an interesting idea, thanks for the code!  My only question is 
whether we can get most of this benefit by dropping the indirection of 
namespaces and have something like EXPORT_SYMBOL_TO(sym, modname)?  It
doesn't work so well for exporting to a group of modules, but that seems
a reasonable line to draw anyway.

Cheers,
Rusty.
PS.  Probably better to use the standard warnx and errx in modpost, too.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [README] away until Dec 3rd

2007-11-21 Thread Herbert Xu

On Tue, Nov 20, 2007 at 08:29:21PM -0800, David Miller wrote:
 
 During this time Herbert Xu (CC:'d) will take care of both the net-2.6
 stable tree and the net-2.6.25 devel tree.

For this duration please use the net-2.6.25 tree at this location
for basing your patches:

git://git.kernel.org/pub/scm/linux/kernel/git/herbert/net-2.6.25.git/

Please note that this tree has already been rebased compared to
Dave's net-2.6.25 tree.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC][PATCH 0/3] PSPacer qdisc module

2007-11-21 Thread Ryousei Takano

Hi all,
I sent this mail yesterday, but it did not be delivered. So I resend it.
I am sorry if you receive duplicate mails.


What is PSPacer?

PSPacer (Precise Software Pacer) is a qdisc module which realizes 
precise transmission bandwidth control. It makes bursty traffic which is 
often generated by TCP smooth without any special hardware.

Bursty traffic can degrade the communication performance, because it 
causes buffer overflow at intermediate network nodes and results in 
packet losses. In a bursty traffic, packets are sent back to back. By 
adding a short pause in between the packets, traffic bursts can be 
avoided.

PSPacer controls the interval between outgoing packets very precisely. 
The key idea of PSPacer is to determine transmission timing of packets 
by the number of bytes transferred. If packets are transferred back to 
back, the timing a packet is sent can be determined by the number of 
bytes sent before the packet. PSPacer fills the gaps between time 
aligned real packets (the packets which are sent by user program) by 
gap packets. The real packets and gap packets are sent back to back, 
and thus the timing of transmission of each real packet can be precisely 
controlled by adjusting the gap packet size. As the gap packets, the IEEE 
802.3x PAUSE frames are used. PAUSE frames are discarded at a switch 
input port, and only real packets go through the switch keeping the 
original intervals. 

In the past, some software-based pacing schemes have been proposed. 
These schemes use timer interrupt based packet transmission timing control. 
Therefore, to achieve precise pacing, they require the operating system 
to maintain a high resolution timer, which could incur a large overhead.

The patchset consists of two parts: one part is to be applied to the Linux
kernel, and the other is to be applied to the iproute2.

For detailed description and the usage of PSPacer, please refer to 
our project page (http://www.gridmpi.org/gridtcp.jsp), and the paper
Design and Evaluation of Precise Software Pacing Mechanisms for Fast 
Long-Distance Networks, in PFLDnet2005.


Usage

- setup qdiscs

  (add the PSPacer qdisc as the root qdisc)
  # /sbin/tc qdisc add dev eth0 root handle 1: psp default 1
  (add the PSPacer class whose target rate is 500Mbps)
  # /sbin/tc class add dev eth0 parent 1: classid 1:1 psp rate 500mbit
  (add the PFIFO qdisc as the sub qdisc)
  # /sbin/tc qdisc add dev eth0 parent 1:1 handle 10: pfifo

- run iperf (to confirm the effect of PSPacer)

  $ iperf -c 192.168.1.2 -i 10 -t 60

Client connecting to 192.168.1.2, TCP port 5122
TCP window size: 16.0 KByte (default)
iperf shows payload bandwidth. 476Mbps is the payload bandwidth
when the physical layer bandwidth is 500Mbps and packet size is
1500Bytes

[  3] local 192.168.1.1 port 46457 connected with 192.168.1.2 port 5122
[  3]  0.0-10.0 sec567 MBytes476 Mbits/sec
[  3] 10.0-20.0 sec567 MBytes476 Mbits/sec

- cleanup qdiscs

  (remove the PFIFO sub qdisc)
  # /sbin/tc qdisc del dev eth0 parent 1:1 handle 10:
  (remove the PSPacer class)
  # /sbin/tc class del dev eth0 parent 1: classid 1:1
  (remove the PSPacer qdisc)
  # /sbin/tc qdisc del dev eth0 root handle 1:
  (remove the PSPacer module)
  # /sbin/rmmod sch_psp


Limitations

(1) PSPacer controls the bandwidth according to the ratio of the target
bandwidth in the maximum transmission bandwidth of the system.
Therefore, the system (computer, network interface, operating system, 
buffer settings, etc.) should have a capability to transmit packets at 
the maximum transmission rate (i.e. 1 Gbps for 1000BASE, 100 
Mbps for 100BASE) to realize a precise pacing.

Therefore, if you want to control Gigabit Ethernet traffic, we recommend 
to use PCI-X, 66MHz/64bit PCI or CSA connected network interface. If 
the total of target bandwidth of the output streams is less than 100Mbps,
you can set the network interface to use 100BASE mode so as to obtain
precise pacing.
For the same reason, avoid using a shared switch (dumb hub) for the edge
switch to which the PC with PSPacer is connected.

(2) PSPacer uses the IEEE 802.3x PAUSE frame as the gap between packets. 
Therefore, you can not use the PAUSE frame to stop transmission from the 
switch/router to the PC. Since PSPacer generates PAUSE frames with zero 
pause time, there should not be any side effects other than you can not 
stop transmission from the switch. However, it is recommended to disable 
IEEE 802.3x flow control function of the switch (to which a PC with 
PSPacer is connected) in order to avoid unexpected behavior.

(3) PSPacer does not support TCP Segmentation Offloading (TSO). You have 
to disable TSO by using the ethtool command (ethtool -K eth0 tso off).


Best regards,

[RFC][PATCH] bonding layer2+3 xmit_hash_policy

2007-11-21 Thread Glenn Griffin

I posted this 2 weeks ago on the bonding-devel list, but didn't
receive any feedback so thought I would bring it up to a larger
audience.

Included is a patch for a new xmit_hash_policy for the bonding driver
that selects slaves based on MAC and IP information.  This is a middle
ground between what currently exists in the layer2 only policy and the
layer3+4 policy.  This policy strives to be fully 802.3ad compliant by
transmitting every packet of any particular flow over the same link.
As documented the layer3+4 policy is not fully compliant for extreme
cases such as ip fragmentation, so this policy is a nice compromise
for environments that require full compliance but desire more than the
layer2 only policy.  Comments?

---
 Documentation/networking/bonding.txt |   23 +++
 drivers/net/bonding/bond_main.c  |   26 --
 include/linux/if_bonding.h   |3 ++-
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/bonding.txt
b/Documentation/networking/bonding.txt
index 1134062..5ac84c0 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -587,6 +587,29 @@ xmit_hash_policy
most UDP traffic is not involved in extended
conversations.  Other implementations of 802.3ad may
or may not tolerate this noncompliance.
+   
+   layer2+3
+
+   This policy uses a combination of layer2 and layer3
+   protocol information to generate the hash.
+
+   Uses XOR of hardware MAC addresses and IP addresses to
+   generate the hash.  The formula is
+
+   (((source IP XOR dest IP) AND 0x) XOR
+   ( source MAC XOR destination MAC ))
+   modulo slave count
+
+   This algorithm will place all traffic to a particular
+   network peer on the same slave.  For non-IP traffic,
+   the formula is the same as for the layer2 transmit
+   hash policy.
+
+   This policy is intended to provide a more balanced
+   distribution of traffic than layer2 alone, especially
+   in environments where a layer3 gateway device is
+   required to reach most destinations. This algorithm is
+   fully 802.3ad complient.

The default value is layer2.  This option was added in bonding
 version 2.6.3.  In earlier versions of bonding, this parameter does
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 423298c..a731812 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -174,6 +174,7 @@ struct bond_parm_tbl bond_mode_tbl[] = {
 struct bond_parm_tbl xmit_hashtype_tbl[] = {
 {  layer2,   BOND_XMIT_POLICY_LAYER2},
 {  layer3+4, BOND_XMIT_POLICY_LAYER34},
+{  layer2+3, BOND_XMIT_POLICY_LAYER23},
 {  NULL,   -1},
 };

@@ -3604,6 +3605,24 @@ void bond_unregister_arp(struct bonding *bond)
 /* Hashing Policies -*/

 /*
+ * Hash for the output device based upon layer 2 and layer 3 data. If
+ * the packet is not IP mimic bond_xmit_hash_policy_l2()
+ */
+static int bond_xmit_hash_policy_l23(struct sk_buff *skb,
+   struct net_device *bond_dev, int count)
+{
+   struct ethhdr *data = (struct ethhdr *)skb-data;
+   struct iphdr *iph = ip_hdr(skb);
+
+   if (skb-protocol == __constant_htons(ETH_P_IP)) {
+   return ((ntohl(iph-saddr ^ iph-daddr)  0x) ^
+   (data-h_dest[5] ^ bond_dev-dev_addr[5])) % count;
+   }
+
+   return (data-h_dest[5] ^ bond_dev-dev_addr[5]) % count;
+}
+
+/*
  * Hash for the output device based upon layer 3 and layer 4 data. If
  * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
  * altogether not IP, mimic bond_xmit_hash_policy_l2()
@@ -4323,6 +4342,8 @@ void bond_set_mode_ops(struct bonding *bond, int mode)
bond_dev-hard_start_xmit = bond_xmit_xor;
if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
bond-xmit_hash_policy = bond_xmit_hash_policy_l34;
+   else if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER23)
+   bond-xmit_hash_policy = bond_xmit_hash_policy_l23;
else
bond-xmit_hash_policy = bond_xmit_hash_policy_l2;
break;
@@ -4334,6 +4355,8 @@ void bond_set_mode_ops(struct bonding *bond, int mode)
bond_dev-hard_start_xmit = bond_3ad_xmit_xor;
if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
bond-xmit_hash_policy = bond_xmit_hash_policy_l34;
+   else if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER23)
+

96 matches

Mail list logo