Re: RE: is it useful testing __LINK_STATE_RX_SCHED in dev_close()?
On Wed, Nov 21, 2007 at 03:09:52PM +0800, [EMAIL PROTECTED] wrote: __LINK_STATE_RX_SCHED still exist in kernel 2.6.23.8. You'll find that it's gone in 2.6.24-rc3. In any case, the code was racy but it's too unlikely (and the fix too intrusive) to be worth fixing in 2.6.23 at this stage. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.24-rc3: find complains about /proc/net
Eric W. Biederman wrote: Below is a preliminary patch. It solves the directory issue but it doesn't play well with proc_mnt and proc_flush_task. It works by simply caching the network namespace when we mount proc so we don't have to be fancy and dynamic. Nice... Where should we apply this patch to? Something for the discussion anyway. I will start sorting out what makes sense tomorrow. Eric From f359fde2469ba8be2123a465e788a83c7e6831e9 Mon Sep 17 00:00:00 2001 From: Eric W. Biederman [EMAIL PROTECTED] Date: Tue, 20 Nov 2007 19:36:05 -0700 Subject: [PATCH] proc: Fix /proc/net directory listings. Having proc dynamically display the contents of /proc/net is hard. So make life simpler by capturing the network namespace when we mount proc and only displaying that network namespace. --- fs/proc/base.c |8 ++-- fs/proc/generic.c |4 ++- fs/proc/internal.h | 13 +++ fs/proc/proc_net.c | 89 --- fs/proc/root.c | 50 ++ include/linux/proc_fs.h |4 ++ 6 files changed, 66 insertions(+), 102 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index aeaf0d0..9d4f06a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2395,7 +2395,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct if (tgid == ~0U) goto out; - ns = dentry-d_sb-s_fs_info; + ns = proc_sbi(dentry-d_sb)-pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tgid, ns); if (task) @@ -2476,7 +2476,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) goto out; } - ns = filp-f_dentry-d_sb-s_fs_info; + ns = proc_sbi(filp-f_dentry-d_sb)-pid_ns; tgid = filp-f_pos - TGID_OFFSET; for (task = next_tgid(tgid, ns); task; @@ -2615,7 +2615,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (tid == ~0U) goto out; - ns = dentry-d_sb-s_fs_info; + ns = proc_sbi(dentry-d_sb)-pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tid, ns); if (task) @@ -2758,7 +2758,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = filp-f_dentry-d_sb-s_fs_info; + ns = proc_sbi(filp-f_dentry-d_sb)-pid_ns; tid = (int)filp-f_version; filp-f_version = 0; for (task = first_tid(leader, tid, pos - 2, ns); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 1bdb624..b58f0ec 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -398,7 +398,9 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam continue; if (!memcmp(dentry-d_name.name, de-name, de-namelen)) { unsigned int ino = de-low_ino; - + + if (de-shadow_proc) + de = de-shadow_proc(dentry-d_sb, de); de_get(de); spin_unlock(proc_subdir_lock); error = -EINVAL; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1820eb2..a26f115 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,18 @@ #include linux/proc_fs.h +struct pid_namespace; +struct net; +struct proc_sb_info { + struct pid_namespace *pid_ns; + struct net *net_ns; +}; + +static inline struct proc_sb_info *proc_sbi(struct super_block *sb) +{ + return sb-s_fs_info; +} + #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); #else @@ -78,3 +90,4 @@ static inline int proc_fd(struct inode *inode) { return PROC_I(inode)-fd; } + diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 131f9c6..8a82e29 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -50,89 +50,15 @@ struct net *get_proc_net(const struct inode *inode) } EXPORT_SYMBOL_GPL(get_proc_net); -static struct proc_dir_entry *proc_net_shadow; +static struct proc_dir_entry *shadow_pde; -static struct dentry *proc_net_shadow_dentry(struct dentry *parent, - struct proc_dir_entry *de) +static struct proc_dir_entry *proc_net_shadow(struct super_block *sb, + struct proc_dir_entry *de) { - struct dentry *shadow = NULL; - struct inode *inode; - if (!de) - goto out; - de_get(de); - inode = proc_get_inode(parent-d_inode-i_sb, de-low_ino, de); - if (!inode) - goto out_de_put; -
Re: HTB/HSFC shaping precision
On 20-11-2007 22:21, Denys Fedoryshchenko wrote: ... If traffic is dropped - it will be resent, a lot of energy will be wasted for nothing. Same bytes will pass all long way around earth just because i am not able to manage my QoS box :-) Sure, but you'll use probably almost every bit you've payed for! Plus uplink bandwidth will be used for that, i am using my own protocol(it is TCP accelerator for satellite communications based on NACK and streaming compression, so each resend - it is few bytes more on uplink and additional delay. Ah yes, even resend over TCP it is more delay, than if it will be queued for few milliseconds on bottleneck. Plus if buffer on STM-1 interface way too small - smallest spike will cause packetlossy, and sitation can be far away from congestion. As result it will be very difficult to reach maximum bandwidth on such link. And linux box in this situation is magic box, which can help to save energy, hungry people and help to use resources efficiently :-) I'm still not sure how this traffic goes around, because eg., if you receive something through a satelite, then it would only make sense if it were controlled earlier to the same speed too. Otherwise you should have this dropping on your HTB (of course you could use big buffers, but anyway...), instead of STM, but resending could be similar. But, if you have full control on your side, it looks like a kind of realtime traffic, and then HFSC should be more appropriate for this (but I only 'heard' about this). Yes, for sure. Thats what i am reading almost each day, when i dont understand something clearly. But, my english is far away from good, so sometimes i just misunderstand something even in good manual. Then good news: read the code! There is really as little English as possible... Cheers, Jarek P. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module
This patch includes the PSPacer (Precise Software Pacer) qdisc module, which achieves precise transmission bandwidth control. You can find more information at the project web page (http://www.gridmpi.org/gridtcp.jsp). Signed-off-by: Ryousei Takano [EMAIL PROTECTED] --- include/linux/pkt_sched.h | 38 ++ net/sched/Kconfig |9 + net/sched/Makefile|1 + net/sched/sch_psp.c | 959 + 4 files changed, 1007 insertions(+), 0 deletions(-) create mode 100644 net/sched/sch_psp.c diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 919af93..d3f8afd 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -430,6 +430,44 @@ enum { #define TCA_ATM_MAX(__TCA_ATM_MAX - 1) +/* Precise Software Pacer section */ + +#define TC_PSP_MAXDEPTH (8) + +typedef long long gapclock_t; + +enum { + MODE_NORMAL = 0, + MODE_STATIC = 1, +}; + +struct tc_psp_copt +{ + __u32 level; + __u32 mode; + __u32 rate; +}; + +struct tc_psp_qopt +{ + __u32 defcls; + __u32 rate; + __u32 direct_pkts; +}; + +struct tc_psp_xstats +{ + __u32 bytes; /* gap packet statistics */ + __u32 packets; +}; + +enum +{ + TCA_PSP_UNSPEC, + TCA_PSP_COPT, + TCA_PSP_QOPT, +}; + /* Network emulator */ enum diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9c15c48..ec40e43 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -184,6 +184,15 @@ config NET_SCH_DSMARK To compile this code as a module, choose M here: the module will be called sch_dsmark. +config NET_SCH_PSP + tristate Precise Software Pacer (PSP) + ---help--- + Say Y here if you want to include PSPacer module, which means + that you will be able to control precise pacing. + + To compile this driver as a module, choose M here: the + module will be called sch_psp. + config NET_SCH_NETEM tristate Network emulator (NETEM) ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 81ecbe8..85425c2 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o +obj-$(CONFIG_NET_SCH_PSP) += sch_psp.o obj-$(CONFIG_NET_SCH_NETEM)+= sch_netem.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/sch_psp.c b/net/sched/sch_psp.c new file mode 100644 index 000..5c56742 --- /dev/null +++ b/net/sched/sch_psp.c @@ -0,0 +1,959 @@ +/* + * net/sched/sch_psp.c PSPacer: Precise Software Pacer + * + * Copyright (C) 2004-2007 National Institute of Advanced + * Industrial Science and Technology (AIST), Japan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors:Ryousei Takano, [EMAIL PROTECTED] + */ + +#include linux/module.h +#include linux/types.h +#include linux/kernel.h +#include linux/netdevice.h +#include linux/skbuff.h +#include linux/rtnetlink.h +#include linux/ethtool.h +#include linux/if_arp.h +#include linux/in.h +#include linux/ip.h +#include net/pkt_sched.h + +/* PSPacer achieves precise rate regulation results, and no microscopic + * burst transmission which exceeds the limit is generated. + * + * The basic idea is that transmission timing can be precisely controlled, + * if packets are sent back-to-back at the wire rate. PSPacer controls + * the packet transmision intervals by inserting additional packets, + * called gap packets, between adjacent packets. The transmission interval + * can be controlled accurately by adjusting the number and size of the gap + * packets. PSPacer uses the 802.3x PAUSE frame as the gap packet. + * + * For the purpose of adjusting the gap size, this Qdisc maintains a byte + * clock which is recorded by a total transmitted byte per connection. + * Each sub-class has a class local clock which is used to make decision + * whether to send a packet or not. If there is not any packets to send, + * gap packets are inserted. + * + * References: + * [1] R.Takano, T.Kudoh, Y.Kodama, M.Matsuda, H.Tezuka, and Y.Ishikawa, + * Design and Evaluation of Precise Software Pacing Mechanisms for + * Fast Long-Distance Networks, PFLDnet2005. + * [2] http://www.gridmpi.org/gridtcp.jsp + */ + +#define HW_GAP (16)/* Preamble(8) + Inter Frame Gap(8) */ +#define FCS(4) /* Frame Check Sequence(4) */ +#define MIN_GAP (64) /* Minimum size of gap packet */ +#define MIN_TARGET_RATE (1000) /* 1 KB/s (= 8
[RFC][PATCH 2/3] TC: PSPacer qdisc module
This patch includes the PSPacer (Precise Software Pacer) qdisc tc part, which achieves precise transmission bandwidth control. You can find more information at the project web page (http://www.gridmpi.org/gridtcp.jsp). Signed-off-by: Ryousei Takano [EMAIL PROTECTED] --- include/linux/pkt_sched.h | 38 + tc/Makefile |1 + tc/q_psp.c| 200 + 3 files changed, 239 insertions(+), 0 deletions(-) create mode 100644 tc/q_psp.c diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 268c515..c708082 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -430,6 +430,44 @@ enum { #define TCA_ATM_MAX(__TCA_ATM_MAX - 1) +/* Precise Software Pacer section */ + +#define TC_PSP_MAXDEPTH (8) + +typedef long long gapclock_t; + +enum { + MODE_NORMAL = 0, + MODE_STATIC = 1, +}; + +struct tc_psp_copt +{ + __u32 level; + __u32 mode; + __u32 rate; +}; + +struct tc_psp_qopt +{ + __u32 defcls; + __u32 rate; + __u32 direct_pkts; +}; + +struct tc_psp_xstats +{ + __u32 bytes; /* gap packet statistics */ + __u32 packets; +}; + +enum +{ + TCA_PSP_UNSPEC, + TCA_PSP_COPT, + TCA_PSP_QOPT, +}; + /* Network emulator */ enum diff --git a/tc/Makefile b/tc/Makefile index a715566..836df9d 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -12,6 +12,7 @@ TCMODULES += q_prio.o TCMODULES += q_tbf.o TCMODULES += q_cbq.o TCMODULES += q_rr.o +TCMODULES += q_psp.o TCMODULES += q_netem.o TCMODULES += f_rsvp.o TCMODULES += f_u32.o diff --git a/tc/q_psp.c b/tc/q_psp.c new file mode 100644 index 000..e3f4cf7 --- /dev/null +++ b/tc/q_psp.c @@ -0,0 +1,200 @@ +/* + * q_psp.c PSPacer: Precise Software Pacer + * + * Copyright (C) 2004-2007 National Institute of Advanced + * Industrial Science and Technology (AIST), Japan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors:Ryousei Takano, [EMAIL PROTECTED] + * + */ + +#include stdio.h +#include stdlib.h +#include unistd.h +#include syslog.h +#include fcntl.h +#include sys/socket.h +#include netinet/in.h +#include arpa/inet.h +#include string.h + +#include utils.h +#include tc_util.h + +static void explain(void) +{ + fprintf(stderr, +Usage: ... qdisc add ... psp [ default N ] [rate RATE]\n + default minor id of class to which unclassified packets are sent {0}\n + rate physical interface bandwidth\n\n +... class add ... psp mode M [ rate MBPS ]\n + mode target rate estimation method (NORMAL=0 STATIC=1) {0}\n + rate rate allocated to this class\n); +} + +static void explain1(char *arg) +{ + fprintf(stderr, Illegal \%s\\n, arg); + explain(); +} + + +static int psp_parse_opt(struct qdisc_util *qu, int argc, char **argv, +struct nlmsghdr *n) +{ + struct tc_psp_qopt qopt; + struct rtattr *tail; + memset(qopt, 0, sizeof(qopt)); + + while (argc 0) { + if (matches(*argv, rate) == 0) { + NEXT_ARG(); + if (get_rate(qopt.rate, *argv)) { + explain1(rate); + return -1; + } + } else if (matches(*argv, default) == 0) { + NEXT_ARG(); + if (get_u32(qopt.defcls, *argv, 16)) { + explain1(default); + return -1; + } + } else if (matches(*argv, help) == 0) { + explain(); + return -1; + } else { + fprintf(stderr, What is \%s\?\n, *argv); + explain(); + return -1; + } + argc--; + argv++; + } + + tail = NLMSG_TAIL(n); + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0); + addattr_l(n, 2024, TCA_OPTIONS, qopt, NLMSG_ALIGN(sizeof(qopt))); + tail-rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; + return 0; +} + +static int psp_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) +{ + struct rtattr *tb[TCA_PSP_QOPT+1]; + struct tc_psp_copt *copt; + struct tc_psp_qopt *qopt; + SPRINT_BUF(b); + + if (opt == NULL) + return 0; + + memset(tb, 0, sizeof(tb)); + parse_rtattr_nested(tb, TCA_PSP_QOPT, opt); + + if (tb[TCA_PSP_COPT]) { + copt = RTA_DATA(tb[TCA_PSP_COPT]); + if (RTA_PAYLOAD(tb[TCA_PSP_COPT]) sizeof(*copt)) +
[RFC][PATCH 3/3] TC: PSPacer man page
This patch includes the man page of the PSPacer (Precise Software Pacing) qdisc module. Signed-off-by: Ryousei Takano [EMAIL PROTECTED] --- man/man8/tc-psp.8 | 166 + 1 files changed, 166 insertions(+), 0 deletions(-) create mode 100644 man/man8/tc-psp.8 diff --git a/man/man8/tc-psp.8 b/man/man8/tc-psp.8 new file mode 100644 index 000..a6e26bf --- /dev/null +++ b/man/man8/tc-psp.8 @@ -0,0 +1,166 @@ +.TH PSP 8 13 October 2007 iproute2 Linux +.SH NAME +PSP \- Precise Software Pacer +.SH SYNOPSIS +.B tc qdisc ... dev +dev +.B ( parent +classid +.B | root) [ handle +major: +.B ] psp [ default +minor-id +.B ] [ rate +rate +.B ] + +.B tc class ... dev +dev +.B parent +major:[minor] +.B [ classid +major:minor +.B ] psp rate +rate +.B ] [ mode +mode +.B ] + +.SH DESCRIPTION +Precise Software Pacer (PSPacer) is a classful queuing discipline +which controls traffic with +.BR tc (8) +command. +PSP achieves a precise pacing per class. + +.SH GAP PACKET +The key to realizing precise pacing is to control the starting time of +the transmission of each packet. We propose a simple yet accurate +mechanism to trigger the transmission of a packet. That is, to insert +a gap packet between the real packets. The gap packet produces a gap +between sequentially transmitted real packets. +We employ a PAUSE packet as a gap packet. A PAUSE packet is defined in +the IEEE 802.3x flow control. + +By changing the gap packet size, the starting time of +the next real packet transmission can be precisely controlled. +For example, to control a half rate transmission, a gap packet is inserted +between every real packet where the gap packet size is the same as +that of the real packets. + +.SH BYTE CLOCK SCHEDULING +Packet transmission is scheduled based on the inter-packet gap of each +class (i.e. target rate). +If the network has multiple bottleneck links, it is necessary to +schedule the order of packet transmission and the packet interval. + +PSPacer maintains a virtual clock which is counted by the total transmitted +byte instead of real time clock. Each sub-class has its local clock +which is used to make decision whether to send a packet or not. +If there is an idle time, a gap packet is inserted. + +.SH CLASSIFICATION +Within one PSP instance, many classes may exist. Each of these classes +contains its own qdisc. + +When enqueuing a packet, PSP starts at the root and uses various methods to +determine which class should be used to obtain the data to be enqueued. + +In the standard configuration, this process is rather easy. +At each node we look for an instruction, and then go to the class the +instruction refers to. If the class found is a leaf-node (without +children), we enqueue the packet there. If it is not yet a leaf node, we do +the same thing over again starting from that node. + +The following actions are performed in order at each node we visit, until +move to another node, or terminates the process. +.TP +(i) +Consult filters attached to the class. If we are at a leaf node, we are done. +Otherwise, restart. +.TP +(ii) +If none of the above returned with an instruction, send to the default class. +.P +./ This algorithm makes sure that a packet always ends up somewhere, even while +./ you are busy building your configuration. + +.SH QDISC +The root of a PSP qdisc class tree has the following parameters: + +.TP +parent major:minor | root +This mandatory parameter determines the place of the PSP instance, +either at the +.B root +of an interface or within an existing class. +.TP +handle major: +Like all other qdiscs, the PSP can be assigned a handle. It should consist only +of a major number, followed by a colon. Optional, but it is very useful +if classes will be generated within this qdisc. +.TP +default minor-id +Unclassified traffic is sent to the class with this minor-id. +.TP +rate rate +Optional. You can explicitly specify the maximum transmission rate. +For example, if a 33MHz/32bit PCI bus is used to connect a Gigabit +Ethernet network interface, the bottleneck is the PCI bus, and the +system can not transmit packets at the rate of gigabit/sec. + +.SH CLASSES +Classes have a host of parameters to configure their operation. + +.TP +parent major:minor +Specifies the place of this class within the hierarchy. If attached directly +to a qdisc and not to another class, minor can be omitted. Mandatory. +.TP +classid major:minor +Like qdiscs, classes can be named. The major number must be equal to the +major number of the qdisc to which it belongs. Optional, but needed if this +class is going to have children. +.TP +rate rate +Maximum transmission rate this class including all its children are assigned. +Optional, but required if this class is set to mode 1 (static target rate). +.TP +mode mode +Range from 0 to 1. The mode 0 is without pacing. The mode 1 is +pacing based on static target rate estimation.
Re: HTB/HSFC shaping precision
On Wed, 21 Nov 2007 10:47:10 +0100, Jarek Poplawski wrote But, if you have full control on your side, it looks like a kind of realtime traffic, and then HFSC should be more appropriate for this (but I only 'heard' about this). One message later, thats what i dreamed about :-) Subject: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module On website they have very good explanation... http://www.gridmpi.org/gridtcp.jsp -- Denys Fedoryshchenko Technical Manager Virtual ISP S.A.L. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module
Ryousei Takano wrote: This patch includes the PSPacer (Precise Software Pacer) qdisc module, which achieves precise transmission bandwidth control. You can find more information at the project web page (http://www.gridmpi.org/gridtcp.jsp). Looks good, but please run checkpatch over it. A few more comments below. +/* Precise Software Pacer section */ + +#define TC_PSP_MAXDEPTH (8) + +typedef long long gapclock_t; + +enum { + MODE_NORMAL = 0, + MODE_STATIC = 1, +}; + +struct tc_psp_copt +{ + __u32 level; + __u32 mode; + __u32 rate; What unit is rate measures in? Is 32 bit really enough? +static struct sk_buff *alloc_gap_packet(struct Qdisc* sch, int size) +{ + struct sk_buff *skb; + struct net_device *dev = sch-dev; + unsigned char *pkt; + int pause_time = 0; + int pktsize = size + 2; + + skb = alloc_skb(pktsize, GFP_ATOMIC); + if (!skb) + return NULL; + + skb_reserve(skb, 2); + + pkt = skb-data; + memset(pkt, 0xff, pktsize); + pkt[0] = 0x01; /* dst address: 01:80:c2:00:00:01 */ + pkt[1] = 0x80; + pkt[2] = 0xc2; + pkt[3] = 0x00; + pkt[4] = 0x00; + pkt[5] = 0x01; + memcpy(pkt + 6, dev-dev_addr, ETH_ALEN /* dev-addr_len */); + + pkt[12] = 0x88; /* MAC control:88 08 */ + pkt[13] = 0x08; + pkt[14] = 0;/* MAC control opcode: 00 01 */ + pkt[15] = 1; A few #defines for all these magic values and a struct for the header would make this nicer. + pkt[16] = pause_time 8; + pkt[17] = pause_time; + + skb_put(skb, size); + + skb-dev = sch-dev; + skb-protocol = ETH_P_802_3; + skb_reset_network_header(skb); /* It is refered at dev_queue_xmit_nit(). */ + + return skb; +} + +static struct psp_class *psp_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qres) +{ + struct psp_sched_data *q = qdisc_priv(sch); + struct psp_class *cl; + struct tcf_result res; + struct tcf_proto *tcf; + int result; + + if ((cl = psp_find(skb-priority, sch)) != NULL cl-level == 0) + return cl; + tcf = q-filter_list; This should handle tc actions. + if (tcf (result = tc_classify(skb, tcf, res)) = 0) { It seems you can have a hierarchy of classes, so why aren't you classifying recursively? + if ((cl = (struct psp_class *)res.class) == NULL) { + if ((cl = psp_find(res.classid, sch)) == NULL) { + /* filter selected invalid classid */ + goto try_default; + } + } + if (is_leaf_class(cl)) + return cl; /* hit leaf class */ + + /* apply inner filter chain */ + tcf = cl-filter_list; + } + + try_default: + /* classification failed, try default class */ + cl = psp_find(TC_H_MAKE(TC_H_MAJ(sch-handle), q-defcls), sch); + if (cl == NULL || cl-level 0) + return PSP_DIRECT; I'd prefer if you don't follow the HTB way of using a direct class for unclassified packets, it makes noticing when classification is incomplete harder and thats what the default class is for. + return cl; +} + +static inline void psp_activate(struct psp_sched_data *q, struct psp_class *cl) +{ + cl-activity |= FLAG_ACTIVE; + list_add_tail(cl-dlist, q-drop_list); +} + +static inline void psp_deactivate(struct psp_sched_data *q, + struct psp_class *cl) +{ + cl-activity = MASK_ACTIVE; MASK_ACTIVE is misleading, its MASK_INACTIVE. I'd suggest to simply use = ~FLAG_ACTIVE or cl-q.qlen != 0 (which indicates an active class). + list_del_init(cl-dlist); +} + +#define COUNT(x, y) (((x) + ((y) - 1)) / (y)) DIV_ROUND_UP +static void add_leaf_class(struct psp_sched_data *q, struct psp_class *cl) +{ + struct psp_class *p; + int mtu = q-mtu + FCS; + + /* chain normal/pacing class list */ + switch (cl-mode) { + case MODE_NORMAL: + list_add_tail(cl-plist, q-normal_list); + break; + case MODE_STATIC: + cl-gapsize = (((q-max_rate / 1000) * mtu) + / (cl-rate / 1000)) - mtu; + cl-gapsize -= (HW_GAP + FCS) * COUNT(q-max_rate, cl-rate); + cl-gapsize = max_t(int, cl-gapsize, MIN_GAP); + cl-activity |= FLAG_DMARK; + list_for_each_entry(p, q-pacing_list, plist) { + if (cl-gapsize p-gapsize) + break; + } + list_add_tail(cl-plist, p-plist); + break; + } +} + +static int recalc_gapsize(struct sk_buff* skb, struct Qdisc *sch) +{ + struct psp_sched_data *q = qdisc_priv(sch); + int ret; +
[PATCH] Add packet filtering based on process's security context.
Hello. This patch comes from a thread at http://lkml.org/lkml/2007/11/16/155 . I want to use IP/port based access control for incoming connections/datagrams. This idea was discussed for several times, but there is no approach that satisfies both it can decide based on the recipient's process and it can sleep so that the LSM hook can query userspace for an access decision. Thus, I'd like to add one new LSM hook in net/core/datagram.c and change return type of one existing LSM hook in net/socket.c . Regards. - Subject: Add packet filtering based on process's security context. This patch allows LSM modules filter incoming connections/datagrams based on the process's security context who is attempting to pick up. There are already hooks to filter incoming connections/datagrams based on the socket's security context, but these hooks are not applicable when one wants to do TCP Wrapper-like filtering (e.g. App1 is permitted to accept TCP connections from 192.168.0.0/16). Precautions: This approach has a side effect which unlikely occurs. If a socket is shared by multiple processes with differnt policy, the process who should be able to accept this connection will not be able to accept this connection because socket_post_accept() aborts this connection. But if socket_post_accept() doesn't abort this connection, the process who must not be able to accept this connection will repeat accept() forever, which is a worse side effect. Similarly, if a socket is shared by multiple processes with differnt policy, the process who should be able to pick up this datagram will not be able to pick up this datagram because socket_post_recv_datagram() discards this datagram. But if socket_post_recv_datagram() doesn't discard this datagram, the process who must not be able to pick up this datagram will repeat recvmsg() forever, which is a worse side effect. So, don't give different permissions between processes who shares one socket. Otherwise, some connections/datagrams cannot be delivered to intended process. Signed-off-by: Kentaro Takeda [EMAIL PROTECTED] Signed-off-by: Tetsuo Handa [EMAIL PROTECTED] include/linux/security.h | 34 +- net/core/datagram.c | 26 -- net/socket.c |7 +-- security/dummy.c | 13 ++--- security/security.c | 10 -- 5 files changed, 76 insertions(+), 14 deletions(-) --- linux-2.6-mm.orig/include/linux/security.h +++ linux-2.6-mm/include/linux/security.h @@ -778,8 +778,12 @@ struct request_sock; * @socket_post_accept: * This hook allows a security module to copy security * information into the newly created socket's inode. + * This hook also allows a security module to filter connections + * from unwanted peers based on the process accepting this connection. + * The connection will be aborted if this hook returns nonzero. * @sock contains the listening socket structure. * @newsock contains the newly created server socket for connection. + * Return 0 if permission is granted. * @socket_sendmsg: * Check permission before transmitting a message to another socket. * @sock contains the socket structure. @@ -793,6 +797,15 @@ struct request_sock; * @size contains the size of message structure. * @flags contains the operational flags. * Return 0 if permission is granted. + * @socket_post_recv_datagram: + * Check permission after receiving a datagram. + * This hook allows a security module to filter packets + * from unwanted peers based on the process receiving this datagram. + * The packet will be discarded if this hook returns nonzero. + * @sk contains the socket. + * @skb contains the socket buffer (may be NULL). + * @flags contains the operational flags. + * Return 0 if permission is granted. * @socket_getsockname: * Check permission before the local address (name) of the socket object * @sock is retrieved. @@ -1389,12 +1402,13 @@ struct security_operations { struct sockaddr * address, int addrlen); int (*socket_listen) (struct socket * sock, int backlog); int (*socket_accept) (struct socket * sock, struct socket * newsock); - void (*socket_post_accept) (struct socket * sock, - struct socket * newsock); + int (*socket_post_accept) (struct socket *sock, struct socket *newsock); int (*socket_sendmsg) (struct socket * sock, struct msghdr * msg, int size); int (*socket_recvmsg) (struct socket * sock, struct msghdr * msg, int size, int flags); + int (*socket_post_recv_datagram) (struct sock *sk, struct sk_buff *skb, + unsigned int flags); int (*socket_getsockname) (struct socket * sock); int (*socket_getpeername) (struct socket *
[RFC/PATCH] SO_NO_CHECK for IPv6
SO_NO_CHECK support for IPv6 appeared to be missing. This is presented, based on a reading of net/ipv4/udp.c. I wonder if IPv4's CHECKSUM_PARTIAL check from udp_push_pending_frames() also needs to be copied to IPv6? Signed-off-by: Jeff Garzik [EMAIL PROTECTED] --- net/ipv6/udp.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ee1cc3f..7927e69 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -538,9 +538,14 @@ static int udp_v6_push_pending_frames(struct sock *sk) uh-len = htons(up-len); uh-check = 0; - if (up-pcflag) + if (up-pcflag) /* UDP-Lite */ csum = udplite_csum_outgoing(sk, skb); -else + + else if (sk-sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + skb-ip_summed = CHECKSUM_NONE; + goto send; + + } else csum = udp_csum_outgoing(sk, skb); /* add protocol-dependent pseudo-header */ @@ -549,6 +554,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (uh-check == 0) uh-check = CSUM_MANGLED_0; +send: err = ip6_push_pending_frames(sk); out: up-len = 0; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] amd8111e: don't call napi_enable if configured w/o NAPI
The amd8111e network driver was broken by bea3348eef27e6044b6161fd04c3152215f96411, which makes the driver call napi_enable() and napi_disable() even if the driver had been configured without CONFIG_AMD8111E_NAPI, and thus netif_napi_add() had not been called on initialization. This triggers a BUG in napi_enable(). This patch fixes the problem. Please apply. Signed-off-by: Jiri Bohac [EMAIL PROTECTED] diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c index eebf5bb..e7fdd81 100644 --- a/drivers/net/amd8111e.c +++ b/drivers/net/amd8111e.c @@ -1340,7 +1340,9 @@ static int amd8111e_close(struct net_device * dev) struct amd8111e_priv *lp = netdev_priv(dev); netif_stop_queue(dev); +#ifdef CONFIG_AMD8111E_NAPI napi_disable(lp-napi); +#endif spin_lock_irq(lp-lock); @@ -1372,7 +1374,9 @@ static int amd8111e_open(struct net_device * dev ) dev-name, dev)) return -EAGAIN; +#ifdef CONFIG_AMD8111E_NAPI napi_enable(lp-napi); +#endif spin_lock_irq(lp-lock); @@ -1380,7 +1384,9 @@ static int amd8111e_open(struct net_device * dev ) if(amd8111e_restart(dev)){ spin_unlock_irq(lp-lock); +#ifdef CONFIG_AMD8111E_NAPI napi_disable(lp-napi); +#endif if (dev-irq) free_irq(dev-irq, dev); return -ENOMEM; -- Jiri Bohac [EMAIL PROTECTED] SUSE Labs, SUSE CZ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: ZD1211RW unaligned accesses...
Hi David, David Miller wrote: Shaddy I attach a hack patch that you can use which should get rid of the warnings. It hasn't seemed to. I patched the source (confirming the patched lines are in), compiled, installed and rebooted to effect the changes. My zd1211rw modules timestamp indicates that I have an updated module: $ ls -l /lib/modules/2.6.22/kernel/drivers/net/wireless/zd1211rw/zd1211rw.ko -rw-r--r-- 1 root root 84536 2007-11-21 23:18 /lib/modules/2.6.22/kernel/drivers/net/wireless/zd1211rw/zd1211rw.ko lsmod confirms the module is loaded. After activating the interface (without configuring it yet): $ ifconfig eth2 up I start getting the messages over and over on the console: Kernel unaligned access at TPC[100ee624] do_rx+0x394/0x5ec [zd1211rw] Kernel unaligned access at TPC[100ee62c] do_rx+0x39c/0x5ec [zd1211rw] Kernel unaligned access at TPC[100ee638] do_rx+0x3a8/0x5ec [zd1211rw] Sorry that this has not been successful this time, but thanks for your help. I will be trying to follow-up on some of the other questions put to me. Regards, Shaddy - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH] SO_NO_CHECK for IPv6
In article [EMAIL PROTECTED] (at Wed, 21 Nov 2007 07:45:32 -0500), Jeff Garzik [EMAIL PROTECTED] says: SO_NO_CHECK support for IPv6 appeared to be missing. This is presented, based on a reading of net/ipv4/udp.c. Disagree. UDP checksum is mandatory in IPv6. --yoshfuji - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH] SO_NO_CHECK for IPv6
On Wed, Nov 21, 2007 at 01:20:51PM +, YOSHIFUJI Hideaki / 吉藤英明 wrote: In article [EMAIL PROTECTED] (at Wed, 21 Nov 2007 07:45:32 -0500), Jeff Garzik [EMAIL PROTECTED] says: SO_NO_CHECK support for IPv6 appeared to be missing. This is presented, based on a reading of net/ipv4/udp.c. Disagree. UDP checksum is mandatory in IPv6. Right, IPv6 doesn't have a header checksum so the UDP checksum must be there. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] dm9000 - fix spinlock issue, updated
The patch below fixes the problem with dm9000_timeout function: it calls dm9000_init under the spin_lock db-lock, which was going to be acquired again in dm9000_hash_table. From the other hand, dm9000_hash_table has to be called with db-lock held Signed-off-by: dmitry pervushin [EMAIL PROTECTED] Index: linux/drivers/net/dm9000.c === --- linux.orig/drivers/net/dm9000.c +++ linux/drivers/net/dm9000.c @@ -173,6 +173,7 @@ static void dm9000_phy_write(struct net_ static u16 read_srom_word(board_info_t *, int); static void dm9000_rx(struct net_device *); static void dm9000_hash_table(struct net_device *); +static void dm9000_set_multicast(struct net_device *dev); //#define DM9000_PROGRAM_EEPROM #ifdef DM9000_PROGRAM_EEPROM @@ -556,7 +557,7 @@ dm9000_probe(struct platform_device *pde ndev-tx_timeout = dm9000_timeout; ndev-watchdog_timeo = msecs_to_jiffies(watchdog); ndev-stop = dm9000_stop; - ndev-set_multicast_list = dm9000_hash_table; + ndev-set_multicast_list = dm9000_set_multicast; #ifdef CONFIG_NET_POLL_CONTROLLER ndev-poll_controller= dm9000_poll_controller; #endif @@ -620,6 +621,7 @@ static int dm9000_open(struct net_device *dev) { board_info_t *db = (board_info_t *) dev-priv; + unsigned long flags; PRINTK2(entering dm9000_open\n); @@ -627,8 +629,10 @@ dm9000_open(struct net_device *dev) return -EAGAIN; /* Initialize DM9000 board */ + spin_lock_irqsave(db-lock, flags); dm9000_reset(db); dm9000_init_dm9000(dev); + spin_unlock_irqrestore(db-lock, flags); /* Init driver variable */ db-dbug_cnt = 0; @@ -1030,6 +1034,18 @@ cal_CRC(unsigned char *Data, unsigned in /* * Set DM9000 multicast address */ + +static void +dm9000_set_multicast(struct net_device *dev) +{ + board_info_t *db = (board_info_t *) dev-priv; + unsigned long flags; + + spin_lock_irqsave(db-lock,flags); + dm9000_hash_table(dev); + spin_unlock_irqrestore(db-lock, flags); +} + static void dm9000_hash_table(struct net_device *dev) { @@ -1038,12 +1054,9 @@ dm9000_hash_table(struct net_device *dev int mc_cnt = dev-mc_count; u32 hash_val; u16 i, oft, hash_table[4]; - unsigned long flags; PRINTK2(dm9000_hash_table()\n); - spin_lock_irqsave(db-lock,flags); - for (i = 0, oft = 0x10; i 6; i++, oft++) iow(db, oft, dev-dev_addr[i]); @@ -1065,8 +1078,6 @@ dm9000_hash_table(struct net_device *dev iow(db, oft++, hash_table[i] 0xff); iow(db, oft++, (hash_table[i] 8) 0xff); } - - spin_unlock_irqrestore(db-lock,flags); } @@ -1155,12 +1166,15 @@ dm9000_drv_resume(struct platform_device { struct net_device *ndev = platform_get_drvdata(dev); board_info_t *db = (board_info_t *) ndev-priv; + unsigned long flags; if (ndev) { if (netif_running(ndev)) { + spin_lock_irqsave(db-lock, flags); dm9000_reset(db); dm9000_init_dm9000(ndev); + spin_unlock_irqrestore(db-lock, flags); netif_device_attach(ndev); } - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] [LIB]: Introduce struct pcounter
On Wed, Nov 07, 2007 at 04:16:15PM +, Arnaldo Carvalho de Melo wrote: This just generalises what was introduced by Eric Dumazet for the struct proto inuse field in 286ab3d46058840d68e5d7d52e316c1f7e98c59f: [NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way. Please look at the comment in there to see the rationale. Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] Both patches applied. Thanks Arnaldo! This patch had some trailing white spaces picked up by git which I've fixed. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] smc911x: Fix unused variable warning.
The smc911x_local pointer in smc911x_rcv is only used in the SMC_USE_DMA case. Move it under the #ifdef so GCC doesn't generate a warning in the non-DMA case. --- drivers/net/smc911x.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c index dd18af0..69a78b3 100644 --- a/drivers/net/smc911x.c +++ b/drivers/net/smc911x.c @@ -428,7 +428,6 @@ static inline void smc911x_drop_pkt(struct net_device *dev) */ static inline void smc911x_rcv(struct net_device *dev) { - struct smc911x_local *lp = netdev_priv(dev); unsigned long ioaddr = dev-base_addr; unsigned int pkt_len, status; struct sk_buff *skb; @@ -473,6 +472,7 @@ static inline void smc911x_rcv(struct net_device *dev) skb_put(skb,pkt_len-4); #ifdef SMC_USE_DMA { + struct smc911x_local *lp = netdev_priv(dev); unsigned int fifo; /* Lower the FIFO threshold if possible */ fifo = SMC_GET_FIFO_INT(); -- 1.5.3.4 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] smc911x: Fix undefined CONFIG_ symbol warning
elif defined(CONFIG_*) should be used instead of elif CONFIG_* so GCC doesn't give warnings about undefined symbols when the config option is disabled. --- drivers/net/smc911x.h |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/smc911x.h b/drivers/net/smc911x.h index 16a0edc..d04e4fa 100644 --- a/drivers/net/smc911x.h +++ b/drivers/net/smc911x.h @@ -37,7 +37,7 @@ #define SMC_USE_16BIT0 #define SMC_USE_32BIT1 #define SMC_IRQ_SENSEIRQF_TRIGGER_FALLING -#elif CONFIG_SH_MAGIC_PANEL_R2 +#elif defined(CONFIG_SH_MAGIC_PANEL_R2) #define SMC_USE_SH_DMA 0 #define SMC_USE_16BIT0 #define SMC_USE_32BIT1 -- 1.5.3.4 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] smc911x: Fix undefined CONFIG_ symbol warning
elif defined(CONFIG_*) should be used instead of elif CONFIG_* so GCC doesn't give warnings about undefined symbols when the config option is disabled. Signed-off-by: Peter Korsgaard [EMAIL PROTECTED] --- Sigh, forgot --signoff :/ drivers/net/smc911x.h |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/smc911x.h b/drivers/net/smc911x.h index 16a0edc..d04e4fa 100644 --- a/drivers/net/smc911x.h +++ b/drivers/net/smc911x.h @@ -37,7 +37,7 @@ #define SMC_USE_16BIT0 #define SMC_USE_32BIT1 #define SMC_IRQ_SENSEIRQF_TRIGGER_FALLING -#elif CONFIG_SH_MAGIC_PANEL_R2 +#elif defined(CONFIG_SH_MAGIC_PANEL_R2) #define SMC_USE_SH_DMA 0 #define SMC_USE_16BIT0 #define SMC_USE_32BIT1 -- 1.5.3.4 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] smc911x: Fix unused variable warning.
The smc911x_local pointer in smc911x_rcv is only used in the SMC_USE_DMA case. Move it under the #ifdef so GCC doesn't generate a warning in the non-DMA case. Signed-off-by: Peter Korsgaard [EMAIL PROTECTED] --- drivers/net/smc911x.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c index dd18af0..69a78b3 100644 --- a/drivers/net/smc911x.c +++ b/drivers/net/smc911x.c @@ -428,7 +428,6 @@ static inline void smc911x_drop_pkt(struct net_device *dev) */ static inline void smc911x_rcv(struct net_device *dev) { - struct smc911x_local *lp = netdev_priv(dev); unsigned long ioaddr = dev-base_addr; unsigned int pkt_len, status; struct sk_buff *skb; @@ -473,6 +472,7 @@ static inline void smc911x_rcv(struct net_device *dev) skb_put(skb,pkt_len-4); #ifdef SMC_USE_DMA { + struct smc911x_local *lp = netdev_priv(dev); unsigned int fifo; /* Lower the FIFO threshold if possible */ fifo = SMC_GET_FIFO_INT(); -- 1.5.3.4 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: HTB/HSFC shaping precision
On Wed, 2007-21-11 at 12:31 +0200, Denys Fedoryshchenko wrote: On Wed, 21 Nov 2007 10:47:10 +0100, Jarek Poplawski wrote But, if you have full control on your side, it looks like a kind of realtime traffic, and then HFSC should be more appropriate for this (but I only 'heard' about this). One message later, thats what i dreamed about :-) Subject: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module On website they have very good explanation... http://www.gridmpi.org/gridtcp.jsp That looks interesting - without reading the papers a few questions are developing in my brain cells; for example it looks very similar to what the chelsio NICs claim to do (which could be a good thing for TCP). Whenever i see someone implementing something in hardware, i always get flushes of patents. Denys, one of the things i have noticed with iperf is it tries to be clever and probe the available bandwidth first. So you may not get the most optimal use of of your bandwidth. Try something like pktgen, its quiet accurate in its measurements. Just add a tc drop rule on the receiver to get the accounting. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module
On Wed, 2007-21-11 at 19:18 +0900, Ryousei Takano wrote: This patch includes the PSPacer (Precise Software Pacer) qdisc module, which achieves precise transmission bandwidth control. You can find more information at the project web page (http://www.gridmpi.org/gridtcp.jsp). Good stuff. I have not read your paper - There are NICs out there (chelsio comes to mind) which claim to do pacing and have shown impressive numbers with TCP. Is your approach similar? Are there patents involved by some of these hardware vendors? (It would not be suprising if they exist). The advantage with NICs is they have very good control of the timing (clock granularity being extremely important in cases like this) - what were your measurements based on i.e what clock source did you use on Linux? Also, the idea of using a PAUSE frame to add gaps is interesting, but you should note that in linux a qdisc may be attached to any network device and this for example maybe a PPP device etc. What would you use for gaps in that case? I apologize if the answers are in your papers - i just glossed over. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/8] ibm_newemac: Fix possible lockup on close
On Wed, Nov 21, 2007 at 05:06:39PM +1100, Benjamin Herrenschmidt wrote: It's a bad idea to call flush_scheduled_work from within a netdev-stop because the linkwatch will occasionally take the rtnl lock from a workqueue context, and thus that can deadlock. This reworks things a bit in that area to avoid the problem. So from the name of the driver you want to keep the previous emac driver around. Is there a good reason for that? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/8] ibm_newemac: Fix possible lockup on close
On Wed, 21 Nov 2007 16:41:23 +0100 Christoph Hellwig [EMAIL PROTECTED] wrote: On Wed, Nov 21, 2007 at 05:06:39PM +1100, Benjamin Herrenschmidt wrote: It's a bad idea to call flush_scheduled_work from within a netdev-stop because the linkwatch will occasionally take the rtnl lock from a workqueue context, and thus that can deadlock. This reworks things a bit in that area to avoid the problem. So from the name of the driver you want to keep the previous emac driver around. Is there a good reason for that? It's being kept around until arch/ppc dies. Then things should get renamed. josh - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv6 0/3] Interface group patches
On Wed, 2007-11-21 at 01:25 +0100, Patrick McHardy wrote: David Miller wrote: From: Laszlo Attila Toth [EMAIL PROTECTED] Date: Tue, 20 Nov 2007 14:52:12 +0100 Jan Engelhardt írta: On Nov 20 2007 14:14, Laszlo Attila Toth wrote: This is the 6th version of our interface group patches. The interface group value can be used to manage different interfaces at the same time such as in netfilter/iptables. I take it you could not use...? iptables -i iif1 -j dosomething iptables -i iif2 -j dosomething This kind of usage requires static interface names. But there are dynamic interfaces such as ppp, where the actual name is not always known or sometimes they exist sometimes not. It is difficult to use iptables this way, and every ifup/ifdown requires change in the iptables ruleset (donwload it, modify and upload to the kernel). It may be too slow. This is actually not true these days. When network devices are created user events are generated and the user can rename the device however they like using a mapping table of any kind. And at such point the problem you present doesn't actually exist, you can know what the device will be named. And if rule loading dynamically is slow, we should fix that instead of creating infrastructure and interfaces we don't actually need. I actually like this feature. Matching on names in iptables has always been one of the major bottlenecks, taking (according to my last measurement, which is some time ago) about 1-2% of the total performance. This is of course in large parts because the interface match is present on *every* rule, but still some way to logically group interfaces seems useful to me, not only for iptables, but also for routing rules, traffic classifiers, af_packet sockets etc. I'm working on the incremental ruleset changing API BTW :) One of the changes will be that interface matching is not a default part of every rule, and without wildcards it will use the ifindex. But since the cost of this feature seems pretty low, I don't see a compelling reason against it. We are also using interface groups from userspace applications (hence the netlink notification). ppp comes up, an interface is created according to the pppd configuration, which then assigns the interface to the given group. another application (a proxy based firewall in our example) listens to this notification and binds to the new interface as well. -- Bazsi - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module
Ryousei Takano a écrit : This patch includes the PSPacer (Precise Software Pacer) qdisc module, which achieves precise transmission bandwidth control. You can find more information at the project web page (http://www.gridmpi.org/gridtcp.jsp). Signed-off-by: Ryousei Takano [EMAIL PROTECTED] +static struct sk_buff *alloc_gap_packet(struct Qdisc* sch, int size) +{ + struct sk_buff *skb; + struct net_device *dev = sch-dev; + unsigned char *pkt; + int pause_time = 0; + int pktsize = size + 2; + + skb = alloc_skb(pktsize, GFP_ATOMIC); + if (!skb) + return NULL; + + skb_reserve(skb, 2); minor nit, but skb_reserve is not *needed* here. skb_reserve() is used to align IP header on a 16 bytes boundary, and we do it on rx side to speedup IP stack, at the cost of a possibly more expensive DMA transfert. Here you dont send an IP packet, do you ? + + pkt = skb-data; + memset(pkt, 0xff, pktsize); + pkt[0] = 0x01; /* dst address: 01:80:c2:00:00:01 */ + pkt[1] = 0x80; + pkt[2] = 0xc2; + pkt[3] = 0x00; + pkt[4] = 0x00; + pkt[5] = 0x01; + memcpy(pkt + 6, dev-dev_addr, ETH_ALEN /* dev-addr_len */); + + pkt[12] = 0x88; /* MAC control:88 08 */ + pkt[13] = 0x08; + pkt[14] = 0;/* MAC control opcode: 00 01 */ + pkt[15] = 1; + pkt[16] = pause_time 8; + pkt[17] = pause_time; + + skb_put(skb, size); + + skb-dev = sch-dev; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 0/2]: TCP MTUprobe fixes
Hi all, Here are two other things in MTU probe code that caught my attention while attempting to figure out the sk_send_head usage there (sent patch to that earlier). The latter here is not strictly speaking a fix but the original code has striking complexity to perform a query which can be reduced to a simple operation, thus I included it here as well. If these seem fine to you as well, inclusion net-2.6 would be nice. Only compile tested. -- i. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 2/2] [TCP] MTUprobe: Cleanup send queue check (no need to loop)
The original code has striking complexity to perform a query which can be reduced to a very simple compare. FIN seqno may be included to write_seq but it should not make any significant difference here compared to skb-len which was used previously. One won't end up there with SYN still queued. Use of write_seq check guarantees that there's a valid skb in send_head so I removed the extra check. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_output.c |7 +-- 1 files changed, 1 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ff22ce8..1822ce6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1315,12 +1315,7 @@ static int tcp_mtu_probe(struct sock *sk) } /* Have enough data in the send queue to probe? */ - len = 0; - if ((skb = tcp_send_head(sk)) == NULL) - return -1; - while ((len += skb-len) size_needed !tcp_skb_is_last(sk, skb)) - skb = tcp_write_queue_next(sk, skb); - if (len size_needed) + if (tp-write_seq - tp-snd_nxt size_needed) return -1; if (tp-snd_wnd size_needed) -- 1.5.0.6 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 1/2] [TCP]: MTUprobe: receiver window data available checks fixed
It seems that the checked range for receiver window check should begin from the first rather than from the last skb that is going to be included to the probe. And that can be achieved without reference to skbs at all, snd_nxt and write_seq provides the correct seqno already. Plus, it SHOULD account packets that are necessary to trigger fast retransmit [RFC4821]. Location of snd_wnd probe_size/size_needed check is bogus because it will cause the other if() match as well (due to snd_nxt = snd_una invariant). Removed dead obvious comment. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] --- net/ipv4/tcp_output.c | 17 - 1 files changed, 8 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 30d6737..ff22ce8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1289,6 +1289,7 @@ static int tcp_mtu_probe(struct sock *sk) struct sk_buff *skb, *nskb, *next; int len; int probe_size; + int size_needed; unsigned int pif; int copy; int mss_now; @@ -1307,6 +1308,7 @@ static int tcp_mtu_probe(struct sock *sk) /* Very simple search strategy: just double the MSS. */ mss_now = tcp_current_mss(sk, 0); probe_size = 2*tp-mss_cache; + size_needed = probe_size + (tp-reordering + 1) * mss_now; if (probe_size tcp_mtu_to_mss(sk, icsk-icsk_mtup.search_high)) { /* TODO: set timer for probe_converge_event */ return -1; @@ -1316,18 +1318,15 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; if ((skb = tcp_send_head(sk)) == NULL) return -1; - while ((len += skb-len) probe_size !tcp_skb_is_last(sk, skb)) + while ((len += skb-len) size_needed !tcp_skb_is_last(sk, skb)) skb = tcp_write_queue_next(sk, skb); - if (len probe_size) + if (len size_needed) return -1; - /* Receive window check. */ - if (after(TCP_SKB_CB(skb)-seq + probe_size, tp-snd_una + tp-snd_wnd)) { - if (tp-snd_wnd probe_size) - return -1; - else - return 0; - } + if (tp-snd_wnd size_needed) + return -1; + if (after(tp-snd_nxt + size_needed, tp-snd_una + tp-snd_wnd)) + return 0; /* Do we need to wait to drain cwnd? */ pif = tcp_packets_in_flight(tp); -- 1.5.0.6 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH][IRDA] Compilation for CONFIG_INET=n case
Found this occasionally. The CONFIG_INET=n is hardly ever set, but if it is the irlan_eth_send_gratuitous_arp() compilation should produce a warning about unused variable in_dev. Too pedantic? :) Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 7f9c854..c682207 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -296,6 +296,7 @@ void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) */ void irlan_eth_send_gratuitous_arp(struct net_device *dev) { +#ifdef CONFIG_INET struct in_device *in_dev; /* @@ -303,7 +304,6 @@ void irlan_eth_send_gratuitous_arp(struct net_device *dev) * is useful if we have changed access points on the same * subnet. */ -#ifdef CONFIG_INET IRDA_DEBUG(4, IrLAN: Sending gratuitous ARP\n); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 2/2] [TCP] MTUprobe: Cleanup send queue check (no need to loop)
Ilpo Järvinen wrote: The original code has striking complexity to perform a query which can be reduced to a very simple compare. FIN seqno may be included to write_seq but it should not make any significant difference here compared to skb-len which was used previously. One won't end up there with SYN still queued. Use of write_seq check guarantees that there's a valid skb in send_head so I removed the extra check. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] Acked-by: John Heffner [EMAIL PROTECTED] --- net/ipv4/tcp_output.c |7 +-- 1 files changed, 1 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ff22ce8..1822ce6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1315,12 +1315,7 @@ static int tcp_mtu_probe(struct sock *sk) } /* Have enough data in the send queue to probe? */ - len = 0; - if ((skb = tcp_send_head(sk)) == NULL) - return -1; - while ((len += skb-len) size_needed !tcp_skb_is_last(sk, skb)) - skb = tcp_write_queue_next(sk, skb); - if (len size_needed) + if (tp-write_seq - tp-snd_nxt size_needed) return -1; if (tp-snd_wnd size_needed) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 1/2] [TCP]: MTUprobe: receiver window data available checks fixed
Ilpo Järvinen wrote: It seems that the checked range for receiver window check should begin from the first rather than from the last skb that is going to be included to the probe. And that can be achieved without reference to skbs at all, snd_nxt and write_seq provides the correct seqno already. Plus, it SHOULD account packets that are necessary to trigger fast retransmit [RFC4821]. Location of snd_wnd probe_size/size_needed check is bogus because it will cause the other if() match as well (due to snd_nxt = snd_una invariant). Removed dead obvious comment. Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED] Acked-by: John Heffner [EMAIL PROTECTED] --- net/ipv4/tcp_output.c | 17 - 1 files changed, 8 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 30d6737..ff22ce8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1289,6 +1289,7 @@ static int tcp_mtu_probe(struct sock *sk) struct sk_buff *skb, *nskb, *next; int len; int probe_size; + int size_needed; unsigned int pif; int copy; int mss_now; @@ -1307,6 +1308,7 @@ static int tcp_mtu_probe(struct sock *sk) /* Very simple search strategy: just double the MSS. */ mss_now = tcp_current_mss(sk, 0); probe_size = 2*tp-mss_cache; + size_needed = probe_size + (tp-reordering + 1) * mss_now; if (probe_size tcp_mtu_to_mss(sk, icsk-icsk_mtup.search_high)) { /* TODO: set timer for probe_converge_event */ return -1; @@ -1316,18 +1318,15 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; if ((skb = tcp_send_head(sk)) == NULL) return -1; - while ((len += skb-len) probe_size !tcp_skb_is_last(sk, skb)) + while ((len += skb-len) size_needed !tcp_skb_is_last(sk, skb)) skb = tcp_write_queue_next(sk, skb); - if (len probe_size) + if (len size_needed) return -1; - /* Receive window check. */ - if (after(TCP_SKB_CB(skb)-seq + probe_size, tp-snd_una + tp-snd_wnd)) { - if (tp-snd_wnd probe_size) - return -1; - else - return 0; - } + if (tp-snd_wnd size_needed) + return -1; + if (after(tp-snd_nxt + size_needed, tp-snd_una + tp-snd_wnd)) + return 0; /* Do we need to wait to drain cwnd? */ pif = tcp_packets_in_flight(tp); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2][2.6.24] ehea: Reworked rcv queue handling to log only fatal errors
Prevent driver from brawly logging packet checksum errors. Signed-off-by: Thomas Klein [EMAIL PROTECTED] --- drivers/net/ehea/ehea.h |2 +- drivers/net/ehea/ehea_main.c | 11 +-- drivers/net/ehea/ehea_qmr.h |4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index 5935899..ea67615 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h @@ -40,7 +40,7 @@ #include asm/io.h #define DRV_NAME ehea -#define DRV_VERSIONEHEA_0082 +#define DRV_VERSIONEHEA_0083 /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index d2f715d..869e160 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -410,11 +410,6 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq, if (cqe-status EHEA_CQE_STAT_ERR_CRC) pr-p_stats.err_frame_crc++; - if (netif_msg_rx_err(pr-port)) { - ehea_error(CQE Error for QP %d, pr-qp-init_attr.qp_nr); - ehea_dump(cqe, sizeof(*cqe), CQE); - } - if (rq == 2) { *processed_rq2 += 1; skb = get_skb_by_index(pr-rq2_skba.arr, pr-rq2_skba.len, cqe); @@ -426,7 +421,11 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq, } if (cqe-status EHEA_CQE_STAT_FAT_ERR_MASK) { - ehea_error(Critical receive error. Resetting port.); + if (netif_msg_rx_err(pr-port)) { + ehea_error(Critical receive error for QP %d. + Resetting port., pr-qp-init_attr.qp_nr); + ehea_dump(cqe, sizeof(*cqe), CQE); + } schedule_work(pr-port-reset_task); return 1; } diff --git a/drivers/net/ehea/ehea_qmr.h b/drivers/net/ehea/ehea_qmr.h index 562de0e..bc62d38 100644 --- a/drivers/net/ehea/ehea_qmr.h +++ b/drivers/net/ehea/ehea_qmr.h @@ -145,8 +145,8 @@ struct ehea_rwqe { #define EHEA_CQE_VLAN_TAG_XTRACT 0x0400 #define EHEA_CQE_TYPE_RQ 0x60 -#define EHEA_CQE_STAT_ERR_MASK 0x720F -#define EHEA_CQE_STAT_FAT_ERR_MASK 0x1F +#define EHEA_CQE_STAT_ERR_MASK 0x700F +#define EHEA_CQE_STAT_FAT_ERR_MASK 0xF #define EHEA_CQE_STAT_ERR_TCP 0x4000 #define EHEA_CQE_STAT_ERR_IP 0x2000 #define EHEA_CQE_STAT_ERR_CRC 0x1000 -- 1.5.2 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2][2.6.24] ehea: Improve tx packets counting
Using own tx_packets counter instead of firmware counters. Signed-off-by: Thomas Klein [EMAIL PROTECTED] --- drivers/net/ehea/ehea.h |2 +- drivers/net/ehea/ehea_main.c |9 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h index f78e5bf..5935899 100644 --- a/drivers/net/ehea/ehea.h +++ b/drivers/net/ehea/ehea.h @@ -40,7 +40,7 @@ #include asm/io.h #define DRV_NAME ehea -#define DRV_VERSIONEHEA_0080 +#define DRV_VERSIONEHEA_0082 /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index f0319f1..d2f715d 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -136,7 +136,7 @@ static struct net_device_stats *ehea_get_stats(struct net_device *dev) struct ehea_port *port = netdev_priv(dev); struct net_device_stats *stats = port-stats; struct hcp_ehea_port_cb2 *cb2; - u64 hret, rx_packets; + u64 hret, rx_packets, tx_packets; int i; memset(stats, 0, sizeof(*stats)); @@ -162,7 +162,11 @@ static struct net_device_stats *ehea_get_stats(struct net_device *dev) for (i = 0; i port-num_def_qps; i++) rx_packets += port-port_res[i].rx_packets; - stats-tx_packets = cb2-txucp + cb2-txmcp + cb2-txbcp; + tx_packets = 0; + for (i = 0; i port-num_def_qps + port-num_add_tx_qps; i++) + tx_packets += port-port_res[i].tx_packets; + + stats-tx_packets = tx_packets; stats-multicast = cb2-rxmcp; stats-rx_errors = cb2-rxuerr; stats-rx_bytes = cb2-rxo; @@ -2000,6 +2004,7 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev) } ehea_post_swqe(pr-qp, swqe); + pr-tx_packets++; if (unlikely(atomic_read(pr-swqe_avail) = 1)) { spin_lock_irqsave(pr-netif_queue, flags); -- 1.5.2 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH,RFC] ep93xx_eth: conversion to phylib framework
On Nov 16, 2007, at 03:38, Herbert Valerio Riedel wrote: Currently, the ep93xx_eth driver doesn't care about the PHY state, but it should, in order to tell the MAC when full duplex operation is required; failure to do so causes degraded performance on full duplex links. This patch implements proper PHY handling via the phylib framework: - clean up ep93xx_mdio_{read,write} to conform to ep93xx manual - convert ep93xx_eth driver to phylib framework - set full duplex bit in configuration of MAC when FDX link detected - convert to use print_mac() Looks good to me. My only comment is that we might want to have support for checking preamble suppression support in the PHY Lib, itself. Acked-by: Andy Fleming [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
The sysfs bits related to the hba should be use one of the scsi sysfs facilities or if they are related to iscsi bits and are generic then through the iscsi hba bnx2i needs 2 sysfs entries - 1. QP size info - this is used to size per connection shared data structures to issue work requests to chip (login, scsi cmd, tmf, nopin) and get completions from the chip (scsi completions, async messages, etc'). This is a iSCSI HBA attribute 2. port mapper - we can be more flexible on classifying this as either iSCSI HBA attribute or bnx2i driver global attribute Can hooks be added to iSCSI transport class to include these? Which ones were they exactly? I think JamesB wanted only common transport values in the transport class. If it is driver specific then it should go on the host or target or device with the scsi_host_template attrs. It's a chicken egg issue to put port mapper sysfs entry in scsi host attributes. Application won't see sysfs unless initiator creates an iSCSI session and driver can't create an iSCSI session without a tcp port. I was wondering if there is a better way than using IOCTL in this situation? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 2/2][BNX2]: Add iSCSI support to BNX2 devices.
Anil Veerabhadrappa wrote: It's a chicken egg issue to put port mapper sysfs entry in scsi host attributes. Application won't see sysfs unless initiator creates an iSCSI session and driver can't create an iSCSI session without a tcp port. I was wondering if there is a better way than using IOCTL in this situation? Agree, and IMHO, is why the scsi_host should have been bound to the ISID or something similar (e.g. the initiator port that can have 1 or more sessions), and the session bound to the scsi_target under the scsi_host. -- james s - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH] SO_NO_CHECK for IPv6
From: Jeff Garzik [EMAIL PROTECTED] Date: Wed, 21 Nov 2007 07:45:32 -0500 SO_NO_CHECK support for IPv6 appeared to be missing. This is presented, based on a reading of net/ipv4/udp.c. I wonder if IPv4's CHECKSUM_PARTIAL check from udp_push_pending_frames() also needs to be copied to IPv6? Signed-off-by: Jeff Garzik [EMAIL PROTECTED] IPV6 specifies that, unlike ipv4, this no-checksum behavior is not allowed. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Routing tables associated with VLANs dissappear when parent ethX down/up
For consideration, this patch seems to work for me. I'm not sure why we ever listed to these events. I've only tested on a NIC that doesn't support hw-accel at the moment..will test with e1000 later. Thanks, Ben -- Ben Greear [EMAIL PROTECTED] Candela Technologies Inc http://www.candelatech.com diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c4209c8..acbf0ff 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -615,6 +615,11 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; +#if 0 + /* Don't propagate management state from base dev to VLANs. If you do this, + * then if you 'ifconfig eth0 down; ifconfig eth0 up', you also lose all the + * routes for eth0.* VLANs. --Ben + */ case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i VLAN_GROUP_ARRAY_LEN; i++) { @@ -644,6 +649,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev_change_flags(vlandev, flgs | IFF_UP); } break; +#endif case NETDEV_UNREGISTER: /* Delete all VLANs for this dev. */
[PATCH] cassini: NAPI configuration
The Cassini driver has NAPI support, but it not possible to configure it. Compile tested only, no idea if it works (no hardware). Get rid of warning from lefover variable in now visible code. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- a/drivers/net/Kconfig 2007-11-19 18:56:12.0 -0800 +++ b/drivers/net/Kconfig 2007-11-21 11:28:20.0 -0800 @@ -587,6 +587,15 @@ config CASSINI Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also http://www.sun.com/products-n-solutions/hardware/docs/pdf/817-4341-10.pdf +config CASSINI_NAPI + bool Use Rx Polling (NAPI) (EXPERIMENTAL) + depends on CASSINI EXPERIMENTAL + help + NAPI is a new driver API designed to reduce CPU and interrupt load + when the driver is receiving lots of packets from the card. + + If in doubt, say N. + config SUNVNET tristate Sun Virtual Network support depends on SUN_LDOMS --- a/drivers/net/cassini.c 2007-11-16 16:17:20.0 -0800 +++ b/drivers/net/cassini.c 2007-11-21 11:30:43.0 -0800 @@ -2611,7 +2611,7 @@ static int cas_poll(struct napi_struct * { struct cas *cp = container_of(napi, struct cas, napi); struct net_device *dev = cp-dev; - int i, enable_intr, todo, credits; + int i, enable_intr, credits; u32 status = readl(cp-regs + REG_INTR_STATUS); unsigned long flags; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/8] ibm_newemac: Fix possible lockup on close
On Wed, 2007-11-21 at 16:41 +0100, Christoph Hellwig wrote: On Wed, Nov 21, 2007 at 05:06:39PM +1100, Benjamin Herrenschmidt wrote: It's a bad idea to call flush_scheduled_work from within a netdev-stop because the linkwatch will occasionally take the rtnl lock from a workqueue context, and thus that can deadlock. This reworks things a bit in that area to avoid the problem. So from the name of the driver you want to keep the previous emac driver around. Is there a good reason for that? Until arch/ppc is gone... the previous driver works with arch/ppc the new one with arch/powerpc. If we kill arch/ppc in .25, then we'll remove the old driver and rename the new one. If not, that will wait til .26 I'm hard at work porting as much of 4xx over I can to get to the point where we -can- kill arch/ppc but I'm not done yet. Cheers, Ben. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Routing tables associated with VLANs dissappear when parent ethX down/up
On Wed, 21 Nov 2007 11:51:43 -0800 Ben Greear [EMAIL PROTECTED] wrote: For consideration, this patch seems to work for me. I'm not sure why we ever listed to these events. I've only tested on a NIC that doesn't support hw-accel at the moment..will test with e1000 later. Thanks, Ben But then if you are doing bonding or bridging of vlan's and you bring down the root network device, the upper layer is not notified (for failover). -- Stephen Hemminger [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Routing tables associated with VLANs dissappear when parent ethX down/up
Stephen Hemminger wrote: On Wed, 21 Nov 2007 11:51:43 -0800 Ben Greear [EMAIL PROTECTED] wrote: For consideration, this patch seems to work for me. I'm not sure why we ever listed to these events. I've only tested on a NIC that doesn't support hw-accel at the moment..will test with e1000 later. Thanks, Ben But then if you are doing bonding or bridging of vlan's and you bring down the root network device, the upper layer is not notified (for failover). operstate should be enough for this I guess. Ben, what does iproute show for the vlan device when the lower device is down? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Routing tables associated with VLANs dissappear when parent ethX down/up
Patrick McHardy wrote: Stephen Hemminger wrote: On Wed, 21 Nov 2007 11:51:43 -0800 Ben Greear [EMAIL PROTECTED] wrote: For consideration, this patch seems to work for me. I'm not sure why we ever listed to these events. I've only tested on a NIC that doesn't support hw-accel at the moment..will test with e1000 later. Thanks, Ben But then if you are doing bonding or bridging of vlan's and you bring down the root network device, the upper layer is not notified (for failover). operstate should be enough for this I guess. Ben, what does iproute show for the vlan device when the lower device is down? It looks like it knows, assuming M-DOWN is useful information. Eth2 is un-plugged, by the way. [EMAIL PROTECTED] ~]# ifconfig eth2 up [EMAIL PROTECTED] ~]# ip link show eth2.2 125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP mtu 1500 qdisc noqueue link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff [EMAIL PROTECTED] ~]# ifconfig eth2 down [EMAIL PROTECTED] ~]# ip link show eth2.2 125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP,M-DOWN mtu 1500 qdisc noqueue link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff [EMAIL PROTECTED] ~]# Thanks, Ben -- Ben Greear [EMAIL PROTECTED] Candela Technologies Inc http://www.candelatech.com - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Routing tables associated with VLANs dissappear when parent ethX down/up
Ben Greear wrote: Patrick McHardy wrote: Stephen Hemminger wrote: But then if you are doing bonding or bridging of vlan's and you bring down the root network device, the upper layer is not notified (for failover). operstate should be enough for this I guess. Ben, what does iproute show for the vlan device when the lower device is down? It looks like it knows, assuming M-DOWN is useful information. Eth2 is un-plugged, by the way. [EMAIL PROTECTED] ~]# ifconfig eth2 up [EMAIL PROTECTED] ~]# ip link show eth2.2 125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP mtu 1500 qdisc noqueue link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff [EMAIL PROTECTED] ~]# ifconfig eth2 down [EMAIL PROTECTED] ~]# ip link show eth2.2 125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP,M-DOWN mtu 1500 qdisc noqueue link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff [EMAIL PROTECTED] ~]# That comes from iproute itself, but the missing LOWER-UP flag indicates it and that should be enough for bridging and bonding. I'm unsure about this though since its still a big difference in userspace visible behaviour, people might just as well manually configure failover once routing disappears or the device goes down, or just have routing fall through to different routes. All this wouldn't work anymore. Maybe we can make this optional somehow without too much uglyness? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Missing audit information in xfrm_audit_common_policyinfo()?
I just noticed that the IPsec auditing code does not appear to audit the netmask for the selector source and destination addresses in xfrm_audit_common_policyinfo(). Before I threw a patch together I thought I would check to see if there was a reason for this that I am missing ... -- paul moore linux security @ hp - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Routing tables associated with VLANs dissappear when parent ethX down/up
Patrick McHardy wrote: Ben Greear wrote: Patrick McHardy wrote: Stephen Hemminger wrote: But then if you are doing bonding or bridging of vlan's and you bring down the root network device, the upper layer is not notified (for failover). operstate should be enough for this I guess. Ben, what does iproute show for the vlan device when the lower device is down? It looks like it knows, assuming M-DOWN is useful information. Eth2 is un-plugged, by the way. [EMAIL PROTECTED] ~]# ifconfig eth2 up [EMAIL PROTECTED] ~]# ip link show eth2.2 125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP mtu 1500 qdisc noqueue link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff [EMAIL PROTECTED] ~]# ifconfig eth2 down [EMAIL PROTECTED] ~]# ip link show eth2.2 125: [EMAIL PROTECTED]: NO-CARRIER,BROADCAST,MULTICAST,UP,M-DOWN mtu 1500 qdisc noqueue link/ether 00:03:2d:08:33:47 brd ff:ff:ff:ff:ff:ff [EMAIL PROTECTED] ~]# That comes from iproute itself, but the missing LOWER-UP flag indicates it and that should be enough for bridging and bonding. I'm unsure about this though since its still a big difference in userspace visible behaviour, people might just as well manually configure failover once routing disappears or the device goes down, or just have routing fall through to different routes. All this wouldn't work anymore. Maybe we can make this optional somehow without too much uglyness? I'm fine with that..we can just add a new vlan-device flag similar to the reorder-header flag. With the current code, on 'UP' of the underlying code, all of the VLANs will also go UP, even if the user had previously put them DOWN. That seems like it could be quite dangerous/unexpected to me..but I guess it's required if we are going to automatically DOWN them... One other thought: Maybe we could tell a small lie and say that we have NO-CARRIER on the VLAN when the underlying device is down OR has no carrier? That way we keep normal link up/down semantics w/out having to change the admin state of the VLANs... Thanks, Ben -- Ben Greear [EMAIL PROTECTED] Candela Technologies Inc http://www.candelatech.com - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Inconsistent lock state and possible irq lock inversion dependency detected in ax25.ko
Hi, I am practicing intensively AX25 packet radio that uses ax25.ko together with mkiss, crc16, netrom, and rose modules using two PIII CPU Linux machines with 2.6.23.8 kernel. On the first Linux machine I did not validate kernel hacking and AX25 applications are running 100% of the time without serious problems. On the second machine I validated kernel hacking and sooner or later I get exactly the same message after a connect timeout expires : [ INFO: inconsistent lock state ] The error seems to reside around ax25_disconnect+0x46/0xaf [ax25] that is called when an AX25 connect timeout or a connection failure occurs. Connect timeout is probably activating ax25_std_heartbeat_expiry+0x19/0xd3 [ax25] The message is only displayed once on a boot session. Ralf Baechle explained to me that ax25 code is very buggy and spinlocks difficult to trace. However, as the cause of error is clearlHowever, as the cause of error is clearly identified and the y identified and the reported address is constant I suspect that an experienced programmer (which I am not) could trace the problem. Moreover, I had the opportunity to catch a different message, that was longer than usual, and seem more explicit : [ INFO: possible irq lock inversion dependency detected ] Although the symptom is different it is related to the same origin : fpac/4933 just changed the state of lock: (slock-AF_AX25){--..}, at: [d8be3312] ax25_disconnect+0x46/0xaf [ax25] Whatever the running application is the inconsistent lock state could be observed with ax25_call, flexd, fpac ax25 application programms. Please find attached a few reports captured from dmesg after each event. Could someone look at the listing and identify the origin of the problem ,if unique ? Thanks. Bernard Pidoux = [ INFO: possible irq lock inversion dependency detected ] 2.6.23.1 #1 - fpac/4933 just changed the state of lock: (slock-AF_AX25){--..}, at: [d8be3312] ax25_disconnect+0x46/0xaf [ax25] but this lock was taken by another, soft-irq-safe lock in the past: (ax25_list_lock){-+..} and interrupts could create inverse lock ordering between them. other info that might help us debug this: no locks held by fpac/4933. the first lock's dependencies: - (slock-AF_AX25){--..} ops: 410 { initial-use at: [c012f448] mark_lock+0x5b/0x44b [c0130358] __lock_acquire+0x4c2/0xc02 [c0130b06] lock_acquire+0x6e/0x87 [c024886b] lock_sock_nested+0x26/0xcc [c02a37fb] _spin_lock_bh+0x2e/0x39 [c024886b] lock_sock_nested+0x26/0xcc [c024886b] lock_sock_nested+0x26/0xcc [c02462e0] sock_fasync+0x61/0x116 [c024727f] sock_close+0x22/0x2f [c015c78b] __fput+0xbc/0x172 [c015a256] filp_close+0x51/0x58 [c0119daf] put_files_struct+0x5e/0xa6 [c011ae59] do_exit+0x22e/0x6d9 [c0103cc6] sysenter_past_esp+0x8f/0x99 [c012fa5e] trace_hardirqs_on+0x11f/0x148 [c011b36f] sys_exit_group+0x0/0xd [c0103c96] sysenter_past_esp+0x5f/0x99 [] 0x softirq-on-W at: [c0130a50] __lock_acquire+0xbba/0xc02 [c0130343] __lock_acquire+0x4ad/0xc02 [c011c8e7] local_bh_enable_ip+0xbd/0xc5 [c0130b06] lock_acquire+0x6e/0x87 [d8be3312] ax25_disconnect+0x46/0xaf [ax25] [c02a37c2] _spin_lock+0x29/0x34 [d8be3312] ax25_disconnect+0x46/0xaf [ax25] [d8be3312] ax25_disconnect+0x46/0xaf [ax25] [d8be50c0] ax25_release+0x9d/0x182 [ax25] [c0246e79] sock_release+0x14/0x56 [c0247287] sock_close+0x2a/0x2f [c015c78b] __fput+0xbc/0x172 [c015a256] filp_close+0x51/0x58 [c015b284] sys_close+0x66/0x9d [c0103c96] sysenter_past_esp+0x5f/0x99 [] 0x hardirq-on-W at: [c012f448] mark_lock+0x5b/0x44b [c013031e] __lock_acquire+0x488/0xc02 [c0130b06] lock_acquire+0x6e/0x87 [c024886b] lock_sock_nested+0x26/0xcc [c02a37fb] _spin_lock_bh+0x2e/0x39 [c024886b] lock_sock_nested+0x26/0xcc [c024886b] lock_sock_nested+0x26/0xcc [c02462e0] sock_fasync+0x61/0x116 [c024727f]
Re: BUG: skge ethernet breakage (PCI: Unable to reserve mem region)
On Wed, 19 Sep 2007 22:57:49 +0200 Jan Gukelberger [EMAIL PROTECTED] wrote: Hi, seems as if there are currently no more ideas? So shall I perhaps open a bug in Kernel Bugzilla? Thanks, Jan On Tue, 2007-09-11 at 15:39 +0200, Jan Gukelberger wrote: On Tue, 2007-09-11 at 14:37 +0200, Stephen Hemminger wrote: On Tue, 11 Sep 2007 12:58:24 +0200 Jan Gukelberger [EMAIL PROTECTED] wrote: On Tue, 2007-09-11 at 10:21 +0200, Stephen Hemminger wrote: On Fri, 07 Sep 2007 18:42:35 +0200 Jan Gukelberger [EMAIL PROTECTED] wrote: [...] The key problem seem to be the following lines in dmesg: ACPI: PCI Interrupt :04:04.0[A] - GSI 19 (level, low) - IRQ 19 PCI: Unable to reserve mem region #1:[EMAIL PROTECTED] for device :04:04.0 skge :04:04.0: cannot obtain PCI resources ACPI: PCI interrupt for device :04:04.0 disabled skge: probe of :04:04.0 failed with error -16 There is some kind of device conflict, please provide lspci -vvvxx output. I'm attaching the output of 'lspci -vvvxx' on the working 2.6.20 kernel as well as the output of 'lspci -vvxxx' on 2.6.23-rc5 which I recorded earlier. I you specifically need 'lspci -vvvxx' on 2.6.23-rc5 please drop me a note and I'll reboot quickly. Thanks, Jan All looks in order, on the PCI tables. There is a firewire control just above the skge device, perhaps you enabled one of the firewire stacks in the configuration? I did a quick diff of the respective kernel .config's (this is the configuration you mean, right?) and haven't found any notable differences in the firewire options. Perhaps the console (dmesg) output will show some clue. I'm attaching a diff between dmesg of a working and a non-working boot. You can find the full dmesg records in my first mail and in the Debian BTS respectively. The only thing I can see there is the old kernel having some problems with the SATA controller - even though I did never notice any unusual behaviour apart from these messages: PCI: Device :02:00.0 not available because of resource collisions ahci: probe of :02:00.0 failed with error -22 JMB363: IDE controller at PCI slot :02:00.0 PCI: Device :02:00.0 not available because of resource collisions ACPI: PCI Interrupt :02:00.0[A] - GSI 16 (level, low) - IRQ 16 JMB363: BIOS configuration fixed. Don't know whether this could be related? Thanks, Jan The problem is in the tables (ACPI) from the BIOS. So ACPI driver (and/or BIOS) have to work out the resource assignments, the driver really has nothing to do with it. -- Stephen Hemminger [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Routing tables associated with VLANs dissappear when parent ethX down/up
Ben Greear wrote: Patrick McHardy wrote: That comes from iproute itself, but the missing LOWER-UP flag indicates it and that should be enough for bridging and bonding. I'm unsure about this though since its still a big difference in userspace visible behaviour, people might just as well manually configure failover once routing disappears or the device goes down, or just have routing fall through to different routes. All this wouldn't work anymore. Maybe we can make this optional somehow without too much uglyness? I'm fine with that..we can just add a new vlan-device flag similar to the reorder-header flag. An alternative to this would be something like Julian Anastasov static routes patch. Not sure if it has ever been considered for merging, but its a cleaner way than doing per-device hacks. http://www.ssi.bg/~ja/ With the current code, on 'UP' of the underlying code, all of the VLANs will also go UP, even if the user had previously put them DOWN. That seems like it could be quite dangerous/unexpected to me..but I guess it's required if we are going to automatically DOWN them... Yeah, I too never liked this behaviour. One other thought: Maybe we could tell a small lie and say that we have NO-CARRIER on the VLAN when the underlying device is down OR has no carrier? That way we keep normal link up/down semantics w/out having to change the admin state of the VLANs... Thats pretty much what the operstate is doing, it should go to IF_OPER_LOWERLAYERDOWN when the lower device is down. But as I said above, people could actually rely on routes disappearing. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] sky2: disable rx checksum on Yukon XL
The Marvell Yukon XL chipset appears to have a hardware glitch where it will repeat the checksum of the last packet. Of course, this is timing sensitive and only happens sometimes... More info: http://bugzilla.kernel.org/show_bug.cgi?id=9381 As a workaround just disable hardware checksumming by default on this chip version. The earlier workaround for PCIX, dual port was also on Yukon XL so don't need to disable checksumming there. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- a/drivers/net/sky2.c2007-11-21 13:53:33.0 -0800 +++ b/drivers/net/sky2.c2007-11-21 13:59:09.0 -0800 @@ -1320,15 +1320,11 @@ static int sky2_up(struct net_device *de */ if (otherdev netif_running(otherdev) (cap = pci_find_capability(hw-pdev, PCI_CAP_ID_PCIX))) { - struct sky2_port *osky2 = netdev_priv(otherdev); u16 cmd; pci_read_config_word(hw-pdev, cap + PCI_X_CMD, cmd); cmd = ~PCI_X_CMD_MAX_SPLIT; pci_write_config_word(hw-pdev, cap + PCI_X_CMD, cmd); - - sky2-rx_csum = 0; - osky2-rx_csum = 0; } if (netif_msg_ifup(sky2)) @@ -4013,7 +4009,7 @@ static __devinit struct net_device *sky2 sky2-duplex = -1; sky2-speed = -1; sky2-advertising = sky2_supported_modes(hw); - sky2-rx_csum = 1; + sky2-rx_csum = (hw-chip_id != CHIP_ID_YUKON_XL); sky2-wol = wol; spin_lock_init(sky2-phy_lock); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 3/4] tlan list is subscribers-only
From: Gabriel C [EMAIL PROTECTED] Your mail to 'Tlan-devel' with the subject drivers/net/tlan question Is being held until the list moderator can review it for approval. The reason it is being held: Post by non-member to a members-only list Signed-off-by: Gabriel Craciunescu [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- MAINTAINERS |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN MAINTAINERS~tlan-list-is-subscribers-only MAINTAINERS --- a/MAINTAINERS~tlan-list-is-subscribers-only +++ a/MAINTAINERS @@ -3733,7 +3733,7 @@ S:Maintained TLAN NETWORK DRIVER P: Samuel Chessman M: [EMAIL PROTECTED] -L: [EMAIL PROTECTED] +L: [EMAIL PROTECTED] (subscribers-only) W: http://sourceforge.net/projects/tlan/ S: Maintained _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 4/4] Net: sunrpc, remove SPIN_LOCK_UNLOCKED
From: Jiri Slaby [EMAIL PROTECTED] sunrpc, remove SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED is deprecated, use DEFINE_SPINLOCK instead Signed-off-by: Jiri Slaby [EMAIL PROTECTED] Cc: David S. Miller [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- net/sunrpc/xprt.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN net/sunrpc/xprt.c~net-sunrpc-remove-spin_lock_unlocked net/sunrpc/xprt.c --- a/net/sunrpc/xprt.c~net-sunrpc-remove-spin_lock_unlocked +++ a/net/sunrpc/xprt.c @@ -62,7 +62,7 @@ static inline voiddo_xprt_reserve(struc static voidxprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); -static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); /* _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 1/4] pfkey: sending an SADB_GET responds with an SADB_GET
From: Charles Hardin [EMAIL PROTECTED] Kernel needs to respond to an SADB_GET with the same message type to conform to the RFC 2367 Section 3.1.5 Cc: David S. Miller [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- net/key/af_key.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN net/key/af_key.c~pfkey-sending-an-sadb_get-responds-with-an-sadb_get net/key/af_key.c --- a/net/key/af_key.c~pfkey-sending-an-sadb_get-responds-with-an-sadb_get +++ a/net/key/af_key.c @@ -1552,7 +1552,7 @@ static int pfkey_get(struct sock *sk, st out_hdr = (struct sadb_msg *) out_skb-data; out_hdr-sadb_msg_version = hdr-sadb_msg_version; - out_hdr-sadb_msg_type = SADB_DUMP; + out_hdr-sadb_msg_type = SADB_GET; out_hdr-sadb_msg_satype = pfkey_proto2satype(proto); out_hdr-sadb_msg_errno = 0; out_hdr-sadb_msg_reserved = 0; _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 2/4] make sunrpc/xprtsock.c:xs_setup_{udp,tcp}() static
From: Adrian Bunk [EMAIL PROTECTED] xs_setup_{udp,tcp}() can now become static. Signed-off-by: Adrian Bunk [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- include/linux/sunrpc/xprtsock.h |6 -- net/sunrpc/xprtsock.c |4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff -puN include/linux/sunrpc/xprtsock.h~make-sunrpc-xprtsockcxs_setup_udptcp-static include/linux/sunrpc/xprtsock.h --- a/include/linux/sunrpc/xprtsock.h~make-sunrpc-xprtsockcxs_setup_udptcp-static +++ a/include/linux/sunrpc/xprtsock.h @@ -9,12 +9,6 @@ #ifdef __KERNEL__ -/* - * Socket transport setup operations - */ -struct rpc_xprt *xs_setup_udp(struct xprt_create *args); -struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); - intinit_socket_xprt(void); void cleanup_socket_xprt(void); diff -puN net/sunrpc/xprtsock.c~make-sunrpc-xprtsockcxs_setup_udptcp-static net/sunrpc/xprtsock.c --- a/net/sunrpc/xprtsock.c~make-sunrpc-xprtsockcxs_setup_udptcp-static +++ a/net/sunrpc/xprtsock.c @@ -1828,7 +1828,7 @@ static struct rpc_xprt *xs_setup_xprt(st * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct xprt_create *args) +static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) { struct sockaddr *addr = args-dstaddr; struct rpc_xprt *xprt; @@ -1894,7 +1894,7 @@ struct rpc_xprt *xs_setup_udp(struct xpr * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) +static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) { struct sockaddr *addr = args-dstaddr; struct rpc_xprt *xprt; _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 5/8] pcmcia net: use roundup_pow_of_two() macro instead of grotesque loop
From: Robert P. J. Day [EMAIL PROTECTED] Signed-off-by: Robert P. J. Day [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Cc: Dominik Brodowski [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/pcmcia/pcnet_cs.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff -puN drivers/net/pcmcia/pcnet_cs.c~pcmcia-net-use-roundup_pow_of_two-macro-instead-of-grotesque-loop drivers/net/pcmcia/pcnet_cs.c --- a/drivers/net/pcmcia/pcnet_cs.c~pcmcia-net-use-roundup_pow_of_two-macro-instead-of-grotesque-loop +++ a/drivers/net/pcmcia/pcnet_cs.c @@ -38,6 +38,7 @@ #include linux/delay.h #include linux/ethtool.h #include linux/netdevice.h +#include linux/log2.h #include ../8390.h #include pcmcia/cs_types.h @@ -1484,8 +1485,7 @@ static int setup_shmem_window(struct pcm window_size = 32 * 1024; /* Make sure it's a power of two. */ -while ((window_size (window_size - 1)) != 0) - window_size += window_size ~(window_size - 1); +window_size = roundup_pow_of_two(window_size); /* Allocate a memory window */ req.Attributes = WIN_DATA_WIDTH_16|WIN_MEMORY_TYPE_CM|WIN_ENABLE; _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 8/8] forcedeth boot delay fix
From: Ayaz Abdulla [EMAIL PROTECTED] Fix a long boot delay in the forcedeth driver. During initialization, the timeout for the handshake between mgmt unit and driver can be very long. The patch reduces the timeout by eliminating a extra loop around the timeout logic. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=9308 Signed-off-by: Ayaz Abdulla [EMAIL PROTECTED] Cc: Alex Howells [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/forcedeth.c | 22 +- 1 file changed, 9 insertions(+), 13 deletions(-) diff -puN drivers/net/forcedeth.c~forcedeth-boot-delay-fix drivers/net/forcedeth.c --- a/drivers/net/forcedeth.c~forcedeth-boot-delay-fix +++ a/drivers/net/forcedeth.c @@ -5294,19 +5294,15 @@ static int __devinit nv_probe(struct pci if (readl(base + NvRegTransmitterControl) NVREG_XMITCTL_SYNC_PHY_INIT) { np-mac_in_use = readl(base + NvRegTransmitterControl) NVREG_XMITCTL_MGMT_ST; dprintk(KERN_INFO %s: mgmt unit is running. mac in use %x.\n, pci_name(pci_dev), np-mac_in_use); - for (i = 0; i 5000; i++) { - msleep(1); - if (nv_mgmt_acquire_sema(dev)) { - /* management unit setup the phy already? */ - if ((readl(base + NvRegTransmitterControl) NVREG_XMITCTL_SYNC_MASK) == - NVREG_XMITCTL_SYNC_PHY_INIT) { - /* phy is inited by mgmt unit */ - phyinitialized = 1; - dprintk(KERN_INFO %s: Phy already initialized by mgmt unit.\n, pci_name(pci_dev)); - } else { - /* we need to init the phy */ - } - break; + if (nv_mgmt_acquire_sema(dev)) { + /* management unit setup the phy already? */ + if ((readl(base + NvRegTransmitterControl) NVREG_XMITCTL_SYNC_MASK) == + NVREG_XMITCTL_SYNC_PHY_INIT) { + /* phy is inited by mgmt unit */ + phyinitialized = 1; + dprintk(KERN_INFO %s: Phy already initialized by mgmt unit.\n, pci_name(pci_dev)); + } else { + /* we need to init the phy */ } } } _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 3/8] ucc_geth-fix-build-break-introduced-by-commit-09f75cd7bf13720738e6a196cc0107ce9a5bd5a0-checkpatch-fixes
From: Andrew Morton [EMAIL PROTECTED] Cc: David S. Miller [EMAIL PROTECTED] Cc: Emil Medve [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Cc: Kumar Gala [EMAIL PROTECTED] Cc: Li Yang [EMAIL PROTECTED] Cc: Paul Mackerras [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/ucc_geth.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN drivers/net/ucc_geth.c~ucc_geth-fix-build-break-introduced-by-commit-09f75cd7bf13720738e6a196cc0107ce9a5bd5a0-checkpatch-fixes drivers/net/ucc_geth.c --- a/drivers/net/ucc_geth.c~ucc_geth-fix-build-break-introduced-by-commit-09f75cd7bf13720738e6a196cc0107ce9a5bd5a0-checkpatch-fixes +++ a/drivers/net/ucc_geth.c @@ -3443,7 +3443,7 @@ static int ucc_geth_rx(struct ucc_geth_p u16 length, howmany = 0; u32 bd_status; u8 *bdBuffer; - struct net_device * dev; + struct net_device *dev; ugeth_vdbg(%s: IN, __FUNCTION__); _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 4/8] drivers/net/chelsio/: #if 0 unused functions
From: Adrian Bunk [EMAIL PROTECTED] This patch #if 0's the following unused functions: - espi.c:t1_espi_set_misc_ctrl() - sge.c:t1_sched_set_max_avail_bytes() - sge.c:t1_sched_set_drain_bits_per_us() Signed-off-by: Adrian Bunk [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/chelsio/espi.c |2 ++ drivers/net/chelsio/espi.h |1 - drivers/net/chelsio/sge.c |4 drivers/net/chelsio/sge.h |2 -- 4 files changed, 6 insertions(+), 3 deletions(-) diff -puN drivers/net/chelsio/espi.c~drivers-net-chelsio-if-0-unused-functions drivers/net/chelsio/espi.c --- a/drivers/net/chelsio/espi.c~drivers-net-chelsio-if-0-unused-functions +++ a/drivers/net/chelsio/espi.c @@ -297,6 +297,7 @@ struct peespi *t1_espi_create(adapter_t return espi; } +#if 0 void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val) { struct peespi *espi = adapter-espi; @@ -309,6 +310,7 @@ void t1_espi_set_misc_ctrl(adapter_t *ad writel(espi-misc_ctrl, adapter-regs + A_ESPI_MISC_CONTROL); spin_unlock(espi-lock); } +#endif /* 0 */ u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait) { diff -puN drivers/net/chelsio/espi.h~drivers-net-chelsio-if-0-unused-functions drivers/net/chelsio/espi.h --- a/drivers/net/chelsio/espi.h~drivers-net-chelsio-if-0-unused-functions +++ a/drivers/net/chelsio/espi.h @@ -62,7 +62,6 @@ void t1_espi_intr_disable(struct peespi int t1_espi_intr_handler(struct peespi *); const struct espi_intr_counts *t1_espi_get_intr_counts(struct peespi *espi); -void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val); u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait); int t1_espi_get_mon_t204(adapter_t *, u32 *, u8); diff -puN drivers/net/chelsio/sge.c~drivers-net-chelsio-if-0-unused-functions drivers/net/chelsio/sge.c --- a/drivers/net/chelsio/sge.c~drivers-net-chelsio-if-0-unused-functions +++ a/drivers/net/chelsio/sge.c @@ -330,6 +330,8 @@ unsigned int t1_sched_update_parms(struc return max_avail_segs * (p-mtu - 40); } +#if 0 + /* * t1_sched_max_avail_bytes() tells the scheduler the maximum amount of * data that can be pushed per port. @@ -357,6 +359,8 @@ void t1_sched_set_drain_bits_per_us(stru t1_sched_update_parms(sge, port, 0, 0); } +#endif /* 0 */ + /* * get_clock() implements a ns clock (see ktime_get) diff -puN drivers/net/chelsio/sge.h~drivers-net-chelsio-if-0-unused-functions drivers/net/chelsio/sge.h --- a/drivers/net/chelsio/sge.h~drivers-net-chelsio-if-0-unused-functions +++ a/drivers/net/chelsio/sge.h @@ -89,8 +89,6 @@ void t1_sge_intr_disable(struct sge *); void t1_sge_intr_clear(struct sge *); const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge); void t1_sge_get_port_stats(const struct sge *sge, int port, struct sge_port_stats *); -void t1_sched_set_max_avail_bytes(struct sge *, unsigned int); -void t1_sched_set_drain_bits_per_us(struct sge *, unsigned int, unsigned int); unsigned int t1_sched_update_parms(struct sge *, unsigned int, unsigned int, unsigned int); _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)
From: Michael Pyne [EMAIL PROTECTED] Partially revert a change to mac address detection introduced to the forcedeth driver. The change was intended to correct mac address detection for newer nVidia chipsets where the mac address was stored in reverse order. One of those chipsets appears to still have the mac address in reverse order (or at least, it does on my system). The change that broke mac address detection for my card was commit ef756b3e56c68a4d76d9d7b9a73fa8f4f739180f forcedeth: mac address correct My network card is an nVidia built-in Ethernet card, output from lspci as follows (with text and numeric ids): $ lspci | grep Ethernet 00:07.0 Bridge: nVidia Corporation MCP61 Ethernet (rev a2) $ lspci -n | grep 07.0 00:07.0 0680: 10de:03ef (rev a2) The vendor id is, of course, nVidia. The device id corresponds to the NVIDIA_NVENET_19 entry. The included patch fixes the MAC address detection on my system. Interestingly, the MAC address appears to be in the range reserved for my motherboard manufacturer (Gigabyte) and not nVidia. Signed-off-by: Michael J. Pyne [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Cc: Ayaz Abdulla [EMAIL PROTECTED] Cc: [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/forcedeth.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network-card-regression-in-2623 drivers/net/forcedeth.c --- a/drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network-card-regression-in-2623 +++ a/drivers/net/forcedeth.c @@ -,7 +,7 @@ static struct pci_device_id pci_tbl[] = }, { /* MCP61 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_19), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTRL|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, }, { /* MCP65 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_20), _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 7/8] Net: ibm_newemac, remove SPIN_LOCK_UNLOCKED
From: Jiri Slaby [EMAIL PROTECTED] SPIN_LOCK_UNLOCKED is deprecated, use DEFINE_SPINLOCK instead Signed-off-by: Jiri Slaby [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/ibm_newemac/debug.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN drivers/net/ibm_newemac/debug.c~net-ibm_newemac-remove-spin_lock_unlocked drivers/net/ibm_newemac/debug.c --- a/drivers/net/ibm_newemac/debug.c~net-ibm_newemac-remove-spin_lock_unlocked +++ a/drivers/net/ibm_newemac/debug.c @@ -21,7 +21,7 @@ #include core.h -static spinlock_t emac_dbg_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(emac_dbg_lock); static void emac_desc_dump(struct emac_instance *p) { _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 6/8] forcedeth: new mcp79 device ids
From: Ayaz Abdulla [EMAIL PROTECTED] Add new device ids and features for mcp79 devices into the forcedeth driver. Signed-off-by: Ayaz Abdulla [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Cc: Manfred Spraul [EMAIL PROTECTED] Cc: [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/forcedeth.c | 16 include/linux/pci_ids.h |4 2 files changed, 20 insertions(+) diff -puN drivers/net/forcedeth.c~forcedeth-new-mcp79-device-ids drivers/net/forcedeth.c --- a/drivers/net/forcedeth.c~forcedeth-new-mcp79-device-ids +++ a/drivers/net/forcedeth.c @@ -5621,6 +5621,22 @@ static struct pci_device_id pci_tbl[] = PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_35), .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, }, + { /* MCP79 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_36), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, + { /* MCP79 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_37), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, + { /* MCP79 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_38), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, + { /* MCP79 Ethernet Controller */ + PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_39), + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_MSI|DEV_HAS_POWER_CNTRL|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_EXTENDED|DEV_HAS_MGMT_UNIT, + }, {0,}, }; diff -puN include/linux/pci_ids.h~forcedeth-new-mcp79-device-ids include/linux/pci_ids.h --- a/include/linux/pci_ids.h~forcedeth-new-mcp79-device-ids +++ a/include/linux/pci_ids.h @@ -1237,6 +1237,10 @@ #define PCI_DEVICE_ID_NVIDIA_NVENET_33 0x0761 #define PCI_DEVICE_ID_NVIDIA_NVENET_34 0x0762 #define PCI_DEVICE_ID_NVIDIA_NVENET_35 0x0763 +#define PCI_DEVICE_ID_NVIDIA_NVENET_36 0x0AB0 +#define PCI_DEVICE_ID_NVIDIA_NVENET_37 0x0AB1 +#define PCI_DEVICE_ID_NVIDIA_NVENET_38 0x0AB2 +#define PCI_DEVICE_ID_NVIDIA_NVENET_39 0x0AB3 #define PCI_VENDOR_ID_IMS 0x10e0 #define PCI_DEVICE_ID_IMS_TT1280x9128 _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 2/5] bluetooth: uninlining
From: Andrew Morton [EMAIL PROTECTED] Remove all those inlines which were either a) unneeded or b) increased code size. textdata bss dec hex filename before: 6997 74 870791ba7 net/bluetooth/hidp/core.o after:6492 74 8657419ae net/bluetooth/hidp/core.o Cc: Marcel Holtmann [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- net/bluetooth/hidp/core.c | 30 +++--- 1 file changed, 19 insertions(+), 11 deletions(-) diff -puN net/bluetooth/hidp/core.c~bluetooth-uninlining net/bluetooth/hidp/core.c --- a/net/bluetooth/hidp/core.c~bluetooth-uninlining +++ a/net/bluetooth/hidp/core.c @@ -135,8 +135,8 @@ static void __hidp_copy_session(struct h } } -static inline int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, - unsigned int type, unsigned int code, int value) +static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, + unsigned int type, unsigned int code, int value) { unsigned char newleds; struct sk_buff *skb; @@ -243,7 +243,8 @@ static void hidp_input_report(struct hid input_sync(dev); } -static inline int hidp_queue_report(struct hidp_session *session, unsigned char *data, int size) +static int hidp_queue_report(struct hidp_session *session, + unsigned char *data, int size) { struct sk_buff *skb; @@ -287,7 +288,7 @@ static void hidp_idle_timeout(unsigned l hidp_schedule(session); } -static inline void hidp_set_timer(struct hidp_session *session) +static void hidp_set_timer(struct hidp_session *session) { if (session-idle_to 0) mod_timer(session-timer, jiffies + HZ * session-idle_to); @@ -332,7 +333,8 @@ static inline int hidp_send_ctrl_message return err; } -static inline void hidp_process_handshake(struct hidp_session *session, unsigned char param) +static void hidp_process_handshake(struct hidp_session *session, + unsigned char param) { BT_DBG(session %p param 0x%02x, session, param); @@ -365,7 +367,8 @@ static inline void hidp_process_handshak } } -static inline void hidp_process_hid_control(struct hidp_session *session, unsigned char param) +static void hidp_process_hid_control(struct hidp_session *session, + unsigned char param) { BT_DBG(session %p param 0x%02x, session, param); @@ -379,7 +382,8 @@ static inline void hidp_process_hid_cont } } -static inline void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, unsigned char param) +static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, + unsigned char param) { BT_DBG(session %p skb %p len %d param 0x%02x, session, skb, skb-len, param); @@ -406,7 +410,8 @@ static inline void hidp_process_data(str } } -static inline void hidp_recv_ctrl_frame(struct hidp_session *session, struct sk_buff *skb) +static void hidp_recv_ctrl_frame(struct hidp_session *session, + struct sk_buff *skb) { unsigned char hdr, type, param; @@ -440,7 +445,8 @@ static inline void hidp_recv_ctrl_frame( kfree_skb(skb); } -static inline void hidp_recv_intr_frame(struct hidp_session *session, struct sk_buff *skb) +static void hidp_recv_intr_frame(struct hidp_session *session, + struct sk_buff *skb) { unsigned char hdr; @@ -608,7 +614,8 @@ static struct device *hidp_get_device(st return conn ? conn-dev : NULL; } -static inline int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) +static int hidp_setup_input(struct hidp_session *session, + struct hidp_connadd_req *req) { struct input_dev *input = session-input; int i; @@ -685,7 +692,8 @@ static void hidp_setup_quirks(struct hid hid-quirks = hidp_blacklist[n].quirks; } -static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req) +static void hidp_setup_hid(struct hidp_session *session, + struct hidp_connadd_req *req) { struct hid_device *hid = session-hid; struct hid_report *report; _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 1/5] bluetooth: hidp_process_hid_control remove unnecessary parameter dealing
From: Dave Young [EMAIL PROTECTED] According to the bluetooth HID spec v1.0 chapter 7.4.2 This code requests a major state change in a BT-HID device. A HID_CONTROL request does not generate a HANDSHAKE response. A HID_CONTROL packet with a parameter of VIRTUAL_CABLE_UNPLUG is the only HID_CONTROL packet a device can send to a host. A host will ignore all other packets. So in the hidp_precess_hid_control function, we just need to deal with the UNLUG packet. Signed-off-by: Dave Young [EMAIL PROTECTED] Cc: Marcel Holtmann [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- net/bluetooth/hidp/core.c | 19 +-- 1 file changed, 1 insertion(+), 18 deletions(-) diff -puN net/bluetooth/hidp/core.c~bluetooth-hidp_process_hid_control-remove-unnecessary-parameter-dealing net/bluetooth/hidp/core.c --- a/net/bluetooth/hidp/core.c~bluetooth-hidp_process_hid_control-remove-unnecessary-parameter-dealing +++ a/net/bluetooth/hidp/core.c @@ -369,30 +369,13 @@ static inline void hidp_process_hid_cont { BT_DBG(session %p param 0x%02x, session, param); - switch (param) { - case HIDP_CTRL_NOP: - break; - - case HIDP_CTRL_VIRTUAL_CABLE_UNPLUG: + if (param == HIDP_CTRL_VIRTUAL_CABLE_UNPLUG) { /* Flush the transmit queues */ skb_queue_purge(session-ctrl_transmit); skb_queue_purge(session-intr_transmit); /* Kill session thread */ atomic_inc(session-terminate); - break; - - case HIDP_CTRL_HARD_RESET: - case HIDP_CTRL_SOFT_RESET: - case HIDP_CTRL_SUSPEND: - case HIDP_CTRL_EXIT_SUSPEND: - /* FIXME: We have to parse these and return no error */ - break; - - default: - __hidp_send_ctrl_message(session, - HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); - break; } } _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 5/5] bluetooth: blacklist another Broadcom BCM2035 device
From: Andy Shevchenko [EMAIL PROTECTED] This device is recognized as bluetooth, but still not works. Signed-off-by: Andy Shevchenko [EMAIL PROTECTED] Cc: Marcel Holtmann [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/bluetooth/hci_usb.c |1 + 1 file changed, 1 insertion(+) diff -puN drivers/bluetooth/hci_usb.c~bluetooth-blacklist-another-broadcom-bcm2035-device drivers/bluetooth/hci_usb.c --- a/drivers/bluetooth/hci_usb.c~bluetooth-blacklist-another-broadcom-bcm2035-device +++ a/drivers/bluetooth/hci_usb.c @@ -111,6 +111,7 @@ static struct usb_device_id blacklist_id { USB_DEVICE(0x0a5c, 0x2033), .driver_info = HCI_IGNORE }, /* Broadcom BCM2035 */ + { USB_DEVICE(0x0a5c, 0x2035), .driver_info = HCI_RESET | HCI_WRONG_SCO_MTU }, { USB_DEVICE(0x0a5c, 0x200a), .driver_info = HCI_RESET | HCI_WRONG_SCO_MTU }, { USB_DEVICE(0x0a5c, 0x2009), .driver_info = HCI_BCM92035 }, _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 4/5] drivers/bluetooth/btsdio.c: fix double-free
From: Adrian Bunk [EMAIL PROTECTED] This patch fixes a double-free spotted by the Coverity checker. Signed-off-by: Adrian Bunk [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/bluetooth/btsdio.c |4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff -puN drivers/bluetooth/btsdio.c~drivers-bluetooth-btsdioc-fix-double-free drivers/bluetooth/btsdio.c --- a/drivers/bluetooth/btsdio.c~drivers-bluetooth-btsdioc-fix-double-free +++ a/drivers/bluetooth/btsdio.c @@ -162,10 +162,8 @@ static int btsdio_rx_packet(struct btsdi bt_cb(skb)-pkt_type = hdr[3]; err = hci_recv_frame(skb); - if (err 0) { - kfree(skb); + if (err 0) return err; - } sdio_writeb(data-func, 0x00, REG_PC_RRT, NULL); _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 3/5] drivers/bluetooth/bpa10x.c: fix memleak
From: Adrian Bunk [EMAIL PROTECTED] This patch fixea a memleak spotted by the Coverity checker. Signed-off-by: Adrian Bunk [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/bluetooth/bpa10x.c |1 + 1 file changed, 1 insertion(+) diff -puN drivers/bluetooth/bpa10x.c~drivers-bluetooth-bpa10xc-fix-memleak drivers/bluetooth/bpa10x.c --- a/drivers/bluetooth/bpa10x.c~drivers-bluetooth-bpa10xc-fix-memleak +++ a/drivers/bluetooth/bpa10x.c @@ -423,6 +423,7 @@ static int bpa10x_send_frame(struct sk_b break; default: + usb_free_urb(urb); return -EILSEQ; } _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] napi: conditional NAPI in drivers
There has been a pattern of bugs in the 2.6.24 conversion where a driver is broken if NAPI is not configured. Change all drivers that have conditional NAPI option to have the datastructure missing so these bugs are caught at compile time. Compile tested only (but that's the point). Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/amd8111e.h |2 ++ drivers/net/cassini.h|3 ++- drivers/net/chelsio/common.h |2 ++ drivers/net/forcedeth.c |3 ++- drivers/net/gianfar.h|2 ++ drivers/net/ixgb/ixgb.h |2 ++ drivers/net/pcnet32.c|2 ++ drivers/net/s2io.h |2 ++ drivers/net/starfire.c |2 ++ drivers/net/ucc_geth.h |2 ++ drivers/net/via-rhine.c |2 ++ 11 files changed, 22 insertions(+), 2 deletions(-) --- a/drivers/net/amd8111e.h2007-11-16 16:17:20.0 -0800 +++ b/drivers/net/amd8111e.h2007-11-21 10:04:20.0 -0800 @@ -763,7 +763,9 @@ struct amd8111e_priv{ /* Reg memory mapped address */ void __iomem *mmio; +#ifdef CONFIG_AMD8111E_NAPI struct napi_struct napi; +#endif spinlock_t lock;/* Guard lock */ unsigned long rx_idx, tx_idx; /* The next free ring entry */ --- a/drivers/net/cassini.h 2007-11-16 16:17:20.0 -0800 +++ b/drivers/net/cassini.h 2007-11-21 10:07:25.0 -0800 @@ -4280,8 +4280,9 @@ struct cas { int rx_cur[N_RX_COMP_RINGS], rx_new[N_RX_COMP_RINGS]; int rx_last[N_RX_DESC_RINGS]; +#ifdef CONFIG_CASSINI_NAPI struct napi_struct napi; - +#endif /* Set when chip is actually in operational state * (ie. not power managed) */ int hw_running; --- a/drivers/net/chelsio/common.h 2007-11-16 16:17:20.0 -0800 +++ b/drivers/net/chelsio/common.h 2007-11-21 10:15:09.0 -0800 @@ -278,7 +278,9 @@ struct adapter { struct peespi *espi; struct petp *tp; +#ifdef CONFIG_CHELSIO_T1_NAPI struct napi_struct napi; +#endif struct port_info port[MAX_NPORTS]; struct delayed_work stats_update_task; struct timer_list stats_update_timer; --- a/drivers/net/forcedeth.c 2007-11-19 18:56:12.0 -0800 +++ b/drivers/net/forcedeth.c 2007-11-21 10:10:43.0 -0800 @@ -748,8 +748,9 @@ struct fe_priv { spinlock_t lock; struct net_device *dev; +#ifdef CONFIG_FORCEDETH_NAPI struct napi_struct napi; - +#endif /* General data: * Locking: spin_lock(np-lock); */ struct nv_ethtool_stats estats; --- a/drivers/net/gianfar.h 2007-11-16 16:17:20.0 -0800 +++ b/drivers/net/gianfar.h 2007-11-21 10:13:25.0 -0800 @@ -691,7 +691,9 @@ struct gfar_private { spinlock_t rxlock; struct net_device *dev; +#ifdef CONFIG_GFAR_NAPI struct napi_struct napi; +#endif /* skb array and index */ struct sk_buff ** rx_skbuff; --- a/drivers/net/ixgb/ixgb.h 2007-11-19 18:56:12.0 -0800 +++ b/drivers/net/ixgb/ixgb.h 2007-11-21 10:15:41.0 -0800 @@ -184,7 +184,9 @@ struct ixgb_adapter { boolean_t rx_csum; /* OS defined structs */ +#ifdef CONFIG_IXGB_NAPI struct napi_struct napi; +#endif struct net_device *netdev; struct pci_dev *pdev; struct net_device_stats net_stats; --- a/drivers/net/starfire.c2007-11-16 16:17:21.0 -0800 +++ b/drivers/net/starfire.c2007-11-21 10:10:13.0 -0800 @@ -596,7 +596,9 @@ struct netdev_private { struct tx_done_desc *tx_done_q; dma_addr_t tx_done_q_dma; unsigned int tx_done; +#ifdef CONFIG_ADAPTEC_STARFIRE_NAPI struct napi_struct napi; +#endif struct net_device *dev; struct net_device_stats stats; struct pci_dev *pci_dev; --- a/drivers/net/ucc_geth.h2007-11-16 16:17:21.0 -0800 +++ b/drivers/net/ucc_geth.h2007-11-21 10:14:04.0 -0800 @@ -1185,7 +1185,9 @@ struct ucc_geth_private { struct ucc_geth_info *ug_info; struct ucc_fast_private *uccf; struct net_device *dev; +#ifdef CONFIG_UGETH_NAPI struct napi_struct napi; +#endif struct ucc_geth *ug_regs; struct ucc_geth_init_pram *p_init_enet_param_shadow; struct ucc_geth_exf_global_pram *p_exf_glbl_param; --- a/drivers/net/via-rhine.c 2007-11-16 16:17:21.0 -0800 +++ b/drivers/net/via-rhine.c 2007-11-21 10:11:14.0 -0800 @@ -390,7 +390,9 @@ struct rhine_private { struct pci_dev *pdev; long pioaddr; struct net_device *dev; +#ifdef CONFIG_VIA_RHINE_NAPI struct napi_struct napi; +#endif struct net_device_stats stats; spinlock_t lock; --- a/drivers/net/tulip/tulip.h 2007-11-16 16:17:21.0 -0800 +++ b/drivers/net/tulip/tulip.h 2007-11-21 10:23:38.0 -0800 @@ -353,7 +353,9 @@ struct tulip_private { int chip_id; int
[patch 1/8] forcedeth: power down phy when interface is down
From: Ed Swierk [EMAIL PROTECTED] Bring the physical link down when the interface is down by placing the PHY in power-down state, unless WOL is enabled. This mirrors the behavior of other drivers including e1000 and tg3. Signed-off-by: Ed Swierk [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Cc: Ayaz Abdulla [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/forcedeth.c | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff -puN drivers/net/forcedeth.c~forcedeth-power-down-phy-when-interface-is-down drivers/net/forcedeth.c --- a/drivers/net/forcedeth.c~forcedeth-power-down-phy-when-interface-is-down +++ a/drivers/net/forcedeth.c @@ -1312,9 +1312,9 @@ static int phy_init(struct net_device *d /* some phys clear out pause advertisment on reset, set it back */ mii_rw(dev, np-phyaddr, MII_ADVERTISE, reg); - /* restart auto negotiation */ + /* restart auto negotiation, power down phy */ mii_control = mii_rw(dev, np-phyaddr, MII_BMCR, MII_READ); - mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE); + mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE | BMCR_PDOWN); if (mii_rw(dev, np-phyaddr, MII_BMCR, mii_control)) { return PHY_ERROR; } @@ -4798,6 +4798,10 @@ static int nv_open(struct net_device *de dprintk(KERN_DEBUG nv_open: begin\n); + /* power up phy */ + mii_rw(dev, np-phyaddr, MII_BMCR, + mii_rw(dev, np-phyaddr, MII_BMCR, MII_READ) ~BMCR_PDOWN); + /* erase previous misconfiguration */ if (np-driver_data DEV_HAS_POWER_CNTRL) nv_mac_reset(dev); @@ -4980,6 +4984,10 @@ static int nv_close(struct net_device *d if (np-wolenabled) { writel(NVREG_PFF_ALWAYS|NVREG_PFF_MYADDR, base + NvRegPacketFilterFlags); nv_start_rx(dev); + } else { + /* power down phy */ + mii_rw(dev, np-phyaddr, MII_BMCR, + mii_rw(dev, np-phyaddr, MII_BMCR, MII_READ)|BMCR_PDOWN); } /* FIXME: power down nic */ _ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/5] udp: memory limitation by using udp_mem
David Miller wrote: From: Hideo AOKI [EMAIL PROTECTED] Date: Thu, 15 Nov 2007 16:50:14 -0500 +static inline int __ip_check_max_skb_pages(struct sock *sk, int size) +{ +switch(sk-sk_protocol) { +case IPPROTO_UDP: +if (atomic_read(sk-sk_prot-memory_allocated) + size + sk-sk_prot-sysctl_mem[0]) +return -ENOBUFS; +/* Fall through */ +default: +break; +} +return 0; +} + snip These special case checks are all over the place. We don't have tests all over the place to see if a socket is TCP or DCCP or SCTP in order to implement memory accounting there, because we did it for connection oriented protocols cleanly, seperating things via callbacks etc. I would like to see the datagram memory accounting work similarly. Hello, I'm still thinking this and focusing on enhancement of above function. However, I feel difficulty because socket buffer allocation of UDP sending packet is in IP layer: ip_append_data(). Moreover, the function is called from several protocols including TCP. This makes setting callback hard without changing function interface or core data structure. Then, I would like to know if the following implementation could be acceptable. - Adding sk_datagram_{rw}mem_schedule() as a memory schedule function for datagram protocols. - Adding sk_wmem_schedule(). In the function, sk_stream_wmem_schedule() is called if the caller socket is stream protocols. Moreover, sk_datagram_wmem_schedule() is called if the socket is datagram like this: int sk_wmem_schedule(struct sock *sk, int size) { ... switch (sk-sk_type) { case SOCK_STREAM: return sk_stream_wmem_schedule(sk, size); case SOCK_DGRAM: return sk_datagram_wmem_schedule(sk, size); default: return 1; } } - In ip_append_data(), sk_wmem_schedule() is called to execute memory accounting. Please let me know if you have any comments about this. Best regards, Hideo -- Hitachi Computer Products (America) Inc. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)
The solution is to get the OEM to update their BIOS (instead of integrating this patch) since the MCP61 specs indicate that the MAC Address should be in correct order from BIOS. By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it could cause it to break on other OEM systems who have implemented it correctly. Thanks, Ayaz -Original Message- From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] Sent: Wednesday, November 21, 2007 3:03 PM To: [EMAIL PROTECTED] Cc: netdev@vger.kernel.org; [EMAIL PROTECTED]; [EMAIL PROTECTED]; Ayaz Abdulla; [EMAIL PROTECTED] Subject: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23) From: Michael Pyne [EMAIL PROTECTED] Partially revert a change to mac address detection introduced to the forcedeth driver. The change was intended to correct mac address detection for newer nVidia chipsets where the mac address was stored in reverse order. One of those chipsets appears to still have the mac address in reverse order (or at least, it does on my system). The change that broke mac address detection for my card was commit ef756b3e56c68a4d76d9d7b9a73fa8f4f739180f forcedeth: mac address correct My network card is an nVidia built-in Ethernet card, output from lspci as follows (with text and numeric ids): $ lspci | grep Ethernet 00:07.0 Bridge: nVidia Corporation MCP61 Ethernet (rev a2) $ lspci -n | grep 07.0 00:07.0 0680: 10de:03ef (rev a2) The vendor id is, of course, nVidia. The device id corresponds to the NVIDIA_NVENET_19 entry. The included patch fixes the MAC address detection on my system. Interestingly, the MAC address appears to be in the range reserved for my motherboard manufacturer (Gigabyte) and not nVidia. Signed-off-by: Michael J. Pyne [EMAIL PROTECTED] Cc: Jeff Garzik [EMAIL PROTECTED] Cc: Ayaz Abdulla [EMAIL PROTECTED] Cc: [EMAIL PROTECTED] Signed-off-by: Andrew Morton [EMAIL PROTECTED] --- drivers/net/forcedeth.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network-c ard-regression-in-2623 drivers/net/forcedeth.c --- a/drivers/net/forcedeth.c~forcedeth-fix-mac-address-detection-on-network -card-regression-in-2623 +++ a/drivers/net/forcedeth.c @@ -,7 +,7 @@ static struct pci_device_id pci_tbl[] = }, { /* MCP61 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_19), - .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTR L|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_E XTENDED|DEV_HAS_MGMT_UNIT|DEV_HAS_CORRECT_MACADDR, + .driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_HIGH_DMA|DEV_HAS_POWER_CNTR L|DEV_HAS_MSI|DEV_HAS_PAUSEFRAME_TX|DEV_HAS_STATISTICS_V2|DEV_HAS_TEST_E XTENDED|DEV_HAS_MGMT_UNIT, }, { /* MCP65 Ethernet Controller */ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_20), _ --- This email message is for the sole use of the intended recipient(s) and may contain confidential information. Any unauthorized review, use, disclosure or distribution is prohibited. If you are not the intended recipient, please contact the sender by reply email and destroy all copies of the original message. --- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)
On Wed, 21 Nov 2007 15:34:52 -0800 Ayaz Abdulla [EMAIL PROTECTED] wrote: The solution is to get the OEM to update their BIOS (instead of integrating this patch) since the MCP61 specs indicate that the MAC Address should be in correct order from BIOS. By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it could cause it to break on other OEM systems who have implemented it correctly. Getting an OEM to fix their BIOS isn't always a simple thing... Perhaps Michael's change should be enabled by a module parameter for those who happen to have the busted BIOS? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)
On Wednesday 21 November 2007, Andrew Morton wrote: On Wed, 21 Nov 2007 15:34:52 -0800 Ayaz Abdulla [EMAIL PROTECTED] wrote: The solution is to get the OEM to update their BIOS (instead of integrating this patch) since the MCP61 specs indicate that the MAC Address should be in correct order from BIOS. By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it could cause it to break on other OEM systems who have implemented it correctly. Getting an OEM to fix their BIOS isn't always a simple thing... Perhaps Michael's change should be enabled by a module parameter for those who happen to have the busted BIOS? I have contacted the motherboard vendor about this a couple of weeks ago per Ayaz's request and have received no response. I've also upgraded to the latest firmware for this motherboard and the bug remains. I think it would be ideal if there were a way to detect broken MCP61's (i.e. those with a Gigabyte MAC ID instead of the nVidia one) and only reverse the MAC address then. A module parameter would also work but then I'd need to remember to apply it. :) Regards, - Michael Pyne signature.asc Description: This is a digitally signed message part.
Re: [RFC/PATCH] SO_NO_CHECK for IPv6
YOSHIFUJI Hideaki / 吉藤英明 wrote: In article [EMAIL PROTECTED] (at Wed, 21 Nov 2007 07:45:32 -0500), Jeff Garzik [EMAIL PROTECTED] says: SO_NO_CHECK support for IPv6 appeared to be missing. This is presented, based on a reading of net/ipv4/udp.c. Disagree. UDP checksum is mandatory in IPv6. Ah, you mean that I need to turn off UDP checksum on receive end as well in IPv6... true. For those interested, I am dealing with a UDP app that already does very strong checksumming and encryption, so additional software checksumming at the lower layers is quite simply a waste of CPU cycles. Hardware checksumming is fine, as long as its free. Jeff - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/8] forcedeth: fix MAC address detection on network card (regression in 2.6.23)
On 22/11/2007, Michael Pyne [EMAIL PROTECTED] wrote: On Wednesday 21 November 2007, Andrew Morton wrote: On Wed, 21 Nov 2007 15:34:52 -0800 Ayaz Abdulla [EMAIL PROTECTED] wrote: The solution is to get the OEM to update their BIOS (instead of integrating this patch) since the MCP61 specs indicate that the MAC Address should be in correct order from BIOS. By changing the feature DEV_HAS_CORRECT_MACADDR to all MCP61 boards, it could cause it to break on other OEM systems who have implemented it correctly. Getting an OEM to fix their BIOS isn't always a simple thing... Perhaps Michael's change should be enabled by a module parameter for those who happen to have the busted BIOS? I have contacted the motherboard vendor about this a couple of weeks ago per Ayaz's request and have received no response. I've also upgraded to the latest firmware for this motherboard and the bug remains. I think it would be ideal if there were a way to detect broken MCP61's (i.e. those with a Gigabyte MAC ID instead of the nVidia one) and only reverse the MAC address then. A module parameter would also work but then I'd need to remember to apply it. :) Hmm, MAC address makeups are not my strong point, but are the no rules describing the various parts of the address that could perhaps be used to infer programatically if the address seems to be reversed or not, and then use that detection logic for all boards that are known to potentially have the issue? A module parameter that overrules the automatic detection (for when it gets it wrong) would probably also be a good idea. -- Jesper Juhl [EMAIL PROTECTED] Don't top-post http://www.catb.org/~esr/jargon/html/T/top-post.html Plain text mails only, please http://www.expita.com/nomime.html - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: HTB/HSFC shaping precision
Hi jamal and denys, One message later, thats what i dreamed about :-) Subject: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module On website they have very good explanation... http://www.gridmpi.org/gridtcp.jsp That looks interesting - without reading the papers a few questions are developing in my brain cells; for example it looks very similar to what the chelsio NICs claim to do (which could be a good thing for TCP). Whenever i see someone implementing something in hardware, i always get flushes of patents. Thanks for looking our web page. PSPacer has quite accurate shaping precision. The point is that special hardware like the chelsio NICs is not required of it. PSPacer uses a gap packet, whose format is IEEE 802.3x pause frame, to control the interval between outgoing packets. As far as I know, it is a unique approach. Best Regards, Ryousei Takano - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH] SO_NO_CHECK for IPv6
On Wed, Nov 21, 2007 at 07:17:40PM -0500, Jeff Garzik wrote: For those interested, I am dealing with a UDP app that already does very strong checksumming and encryption, so additional software checksumming at the lower layers is quite simply a waste of CPU cycles. Hardware checksumming is fine, as long as its free. No matter how strong your underlying checksumming is it's not going to protect the IPv6 header is it :) Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC] [1/9] Core module symbol namespaces code and intro.
There seems to be rough consensus that the kernel currently has too many exported symbols. A lot of these exports are generally usable utility functions or important driver interfaces; but another large part are functions intended by only one or two very specific modules for a very specific purpose. One example is the TCP code. It has most of its internals exported, but only for use by tcp_ipv6.c (and now a few more by the TCP/IP congestion modules) But it doesn't make sense to include these exported for a specific module functions into a broader kernel interface. External modules assume they can use these functions, but they were never intended for that. This patch allows to export symbols only for specific modules by introducing symbol name spaces. A module name space has a white list of modules that are allowed to import symbols for it; all others can't use the symbols. It adds two new macros: MODULE_NAMESPACE_ALLOW(namespace, module); Allow module to import symbols from namespace. module is the module name without .ko as displayed by lsmod. Must be in the same module as the export (and be duplicated if there are multiple modules exporting symbols to a namespace). Multiple allows for the same name space are allowed. EXPORT_SYMBOL_NS(namespace, symbol); Export symbol into namespace. Only modules allowed for the namespace will be able to use them. EXPORT_SYMBOL_NS implies GPL only because it is only for internal interfaces. The name spaces only work for module loading. I didn't find a nice way to make them work inside the main kernel binary. This means the name space is not enforced for modules that are built in. The biggest amount of work is of course still open: to go over all the existing exports and figure for which ones it makes sense to define a namespace. I did it for TCP and UDP so far, but the kernel right now has nearly 10k exports (with some dups) that would need to be checked and turned into name spaces. I would expect any symbol that is only used by one or two other modules is a strong candidate for a namespace; in some cases even more with modules that are tightly coupled. I am optimistic that in the end we will have a much more manageable kernel interface. Caveats: Exports need one long word more memory. I had to add some alignment magic to the existing EXPORT_SYMBOLs to get the sections right. Tested on i386/x86-64, but I hope it also still works on architectures with stricter alignment requirements like ARM. Any testers for that? --- arch/arm/kernel/armksyms.c|2 include/asm-generic/vmlinux.lds.h |7 + include/linux/module.h| 71 +++ kernel/module.c | 137 +++--- 4 files changed, 177 insertions(+), 40 deletions(-) Index: linux/include/linux/module.h === --- linux.orig/include/linux/module.h +++ linux/include/linux/module.h @@ -34,6 +34,7 @@ struct kernel_symbol { unsigned long value; const char *name; + const char *namespace; }; struct modversion_info @@ -167,49 +168,80 @@ struct notifier_block; #ifdef CONFIG_MODULES /* Get/put a kernel symbol (calls must be symmetric) */ -void *__symbol_get(const char *symbol); -void *__symbol_get_gpl(const char *symbol); +extern void *do_symbol_get(const char *symbol, struct module *caller); +#define __symbol_get(sym) do_symbol_get(sym, THIS_MODULE) #define symbol_get(x) ((typeof(x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) +struct module_ns { + char *name; + char *allowed; +}; + +#define NS_SEPARATOR . + +/* + * Allow module MODULE to reference namespace NS. + * MODULE is just the base module name with suffix or path. + * This must be declared in the module (or main kernel) as where the + * symbols are defined. When multiple modules export symbols from + * a single namespace all modules need to contain a full set + * of MODULE_NAMESPACE_ALLOWs. + */ +#define MODULE_NAMESPACE_ALLOW(ns, module) \ + static const struct module_ns __knamespace_##module##_##_##ns \ + asm(__knamespace_ #module NS_SEPARATOR #ns) \ + __attribute_used__ \ + __attribute__((section(__knamespace), unused))\ + = { #ns, #module } + #ifndef __GENKSYMS__ #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ -#define __CRC_SYMBOL(sym, sec) \ +#define __CRC_SYMBOL(sym, sec, post, post2)\ extern void *__crc_##sym __attribute__((weak)); \ - static const unsigned long __kcrctab_##sym \ + static const unsigned long __kcrctab_##sym##post\ + asm(__kcrctab_ #sym post2)\ __attribute_used__ \
[PATCH RFC] [2/9] Fix duplicate symbol check to also check future gpl and unused symbols
This seems to have been forgotten earlier. Right now it was possible for a normal symbol to override a future gpl symbol and similar. I restructured the code a bit to avoid too much duplicated code. --- kernel/module.c | 45 - 1 file changed, 24 insertions(+), 21 deletions(-) Index: linux/kernel/module.c === --- linux.orig/kernel/module.c +++ linux/kernel/module.c @@ -1430,33 +1430,36 @@ EXPORT_SYMBOL_GPL(do_symbol_get); * Ensure that an exported symbol [global namespace] does not already exist * in the kernel or in some other module's exported symbol table. */ -static int verify_export_symbols(struct module *mod) + +static int check_duplicate(const struct kernel_symbol *syms, int num, struct module *owner) { - const char *name = NULL; - unsigned long i, ret = 0; - struct module *owner; + int i; const unsigned long *crc; - for (i = 0; i mod-num_syms; i++) - if (find_symbol(mod-syms[i].name, owner, crc, 1, mod)) { - name = mod-syms[i].name; - ret = -ENOEXEC; - goto dup; - } - - for (i = 0; i mod-num_gpl_syms; i++) - if (find_symbol(mod-gpl_syms[i].name, owner, crc, 1, mod)) { - name = mod-gpl_syms[i].name; - ret = -ENOEXEC; - goto dup; + for (i = 0; i num; i++) + if (find_symbol(syms[i].name, owner, crc, 1, owner)) { + printk(KERN_ERR %s: exports duplicate symbol %s (owned by %s)\n, + owner-name, syms[i].name, module_name(owner)); + return -ENOEXEC; } + return 0; +} -dup: +static int verify_export_symbols(struct module *mod) +{ + int ret = check_duplicate(mod-syms, mod-num_syms, mod); if (ret) - printk(KERN_ERR %s: exports duplicate symbol %s (owned by %s)\n, - mod-name, name, module_name(owner)); - - return ret; + return ret; + ret = check_duplicate(mod-gpl_syms, mod-num_gpl_syms, mod); + if (ret) + return ret; + ret = check_duplicate(mod-unused_syms, mod-num_unused_syms, mod); + if (ret) + return ret; + ret = check_duplicate(mod-unused_gpl_syms, mod-num_unused_gpl_syms, mod); + if (ret) + return ret; + return check_duplicate(mod-gpl_future_syms, mod-num_gpl_future_syms, mod); } /* Change all symbols so that sh_value encodes the pointer directly. */ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC] [4/9] modpost: Fix format string warnings
Fix wrong format strings in modpost exposed by the previous patch. Including one missing argument -- some random data was printed instead. --- scripts/mod/modpost.c |7 --- 1 file changed, 4 insertions(+), 3 deletions(-) Index: linux/scripts/mod/modpost.c === --- linux.orig/scripts/mod/modpost.c +++ linux/scripts/mod/modpost.c @@ -388,7 +388,7 @@ static int parse_elf(struct elf_info *in /* Check if file offset is correct */ if (hdr-e_shoff info-size) { - fatal(section header offset=%u in file '%s' is bigger then filesize=%lu\n, hdr-e_shoff, filename, info-size); + fatal(section header offset=%lu in file '%s' is bigger then filesize=%lu\n, (unsigned long)hdr-e_shoff, filename, info-size); return 0; } @@ -409,7 +409,7 @@ static int parse_elf(struct elf_info *in const char *secname; if (sechdrs[i].sh_offset info-size) { - fatal(%s is truncated. sechdrs[i].sh_offset=%u sizeof(*hrd)=%ul\n, filename, (unsigned int)sechdrs[i].sh_offset, sizeof(*hdr)); + fatal(%s is truncated. sechdrs[i].sh_offset=%lu sizeof(*hrd)=%lu\n, filename, (unsigned long)sechdrs[i].sh_offset, sizeof(*hdr)); return 0; } secname = secstrings + sechdrs[i].sh_name; @@ -907,7 +907,8 @@ static void warn_sec_mismatch(const char before '%s' (at offset -0x%llx)\n, modname, fromsec, (unsigned long long)r.r_offset, secname, refsymname, -elf-strtab + after-st_name); +elf-strtab + after-st_name, +(unsigned long long)r.r_offset); } else { warn(%s(%s+0x%llx): Section mismatch: reference to %s:%s\n, modname, fromsec, (unsigned long long)r.r_offset, - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC] [5/9] modpost: Fix a buffer overflow in modpost
When passing an file name 1k the stack could be overflowed. Not really a security issue, but still better plugged. --- scripts/mod/modpost.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: linux/scripts/mod/modpost.c === --- linux.orig/scripts/mod/modpost.c +++ linux/scripts/mod/modpost.c @@ -1656,7 +1656,6 @@ int main(int argc, char **argv) { struct module *mod; struct buffer buf = { }; - char fname[SZ]; char *kernel_read = NULL, *module_read = NULL; char *dump_write = NULL; int opt; @@ -1709,6 +1708,8 @@ int main(int argc, char **argv) err = 0; for (mod = modules; mod; mod = mod-next) { + char fname[strlen(mod-name) + 10]; + if (mod-skip) continue; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC] [6/9] Implement namespace checking in modpost
This checks the namespaces at build time in modpost --- scripts/mod/modpost.c | 344 ++ 1 file changed, 317 insertions(+), 27 deletions(-) Index: linux/scripts/mod/modpost.c === --- linux.orig/scripts/mod/modpost.c +++ linux/scripts/mod/modpost.c @@ -1,8 +1,9 @@ -/* Postprocess module symbol versions +/* Postprocess module symbol versions and do various other module checks. * * Copyright 2003 Kai Germaschewski * Copyright 2002-2004 Rusty Russell, IBM Corporation * Copyright 2006 Sam Ravnborg + * Copyright 2007 Andi Kleen, SUSE Labs (changes licensed GPLv2 only) * Based in part on module-init-tools/depmod.c,file2alias * * This software may be used and distributed according to the terms @@ -12,9 +13,13 @@ */ #include ctype.h +#include assert.h #include modpost.h #include ../../include/linux/license.h +#define NS_SEPARATOR '.' +#define NS_SEPARATOR_STRING . + /* Are we using CONFIG_MODVERSIONS? */ int modversions = 0; /* Warn about undefined symbols? (do so if we have vmlinux) */ @@ -27,6 +32,9 @@ static int external_module = 0; static int vmlinux_section_warnings = 1; /* Only warn about unresolved symbols */ static int warn_unresolved = 0; +/* Fixing those would cause too many ifdefs -- off by default. */ +static int warn_missing_modules = 0; + /* How a symbol is exported */ enum export { export_plain, export_unused, export_gpl, @@ -105,19 +113,43 @@ static struct module *find_module(char * return mod; } -static struct module *new_module(char *modname) +static const char *basename(const char *s) +{ + char *p = strrchr(s, '/'); + if (p) + return p + 1; + return s; +} + +static struct module *find_module_base(char *modname) { struct module *mod; - char *p, *s; - mod = NOFAIL(malloc(sizeof(*mod))); - memset(mod, 0, sizeof(*mod)); - p = NOFAIL(strdup(modname)); + for (mod = modules; mod; mod = mod-next) { + if (strcmp(basename(mod-name), modname) == 0) + break; + } + return mod; +} +static void strip_o(char *p) +{ + char *s; /* strip trailing .o */ if ((s = strrchr(p, '.')) != NULL) if (strcmp(s, .o) == 0) *s = '\0'; +} + +static struct module *new_module(char *modname) +{ + struct module *mod; + char *p; + + mod = NOFAIL(malloc(sizeof(*mod))); + memset(mod, 0, sizeof(*mod)); + p = NOFAIL(strdup(modname)); + strip_o(p); /* add to list */ mod-name = p; @@ -132,10 +164,12 @@ static struct module *new_module(char *m * struct symbol is also used for lists of unresolved symbols */ #define SYMBOL_HASH_SIZE 1024 +#define NSALLOW_HASH_SIZE 64 struct symbol { struct symbol *next; struct module *module; + const char *namespace; unsigned int crc; int crc_valid; unsigned int weak:1; @@ -147,10 +181,19 @@ struct symbol { char name[0]; }; +struct nsallow { + struct nsallow *next; + struct module *mod; + struct module *orig; + int ref; + char name[0]; +}; + static struct symbol *symbolhash[SYMBOL_HASH_SIZE]; +static struct nsallow *nsallowhash[NSALLOW_HASH_SIZE]; /* This is based on the hash agorithm from gdbm, via tdb */ -static inline unsigned int tdb_hash(const char *name) +static unsigned int tdb_hash(const char *name) { unsigned value; /* Used to compute the hash value. */ unsigned i; /* Used to cycle through random values. */ @@ -192,21 +235,67 @@ static struct symbol *new_symbol(const c return new; } -static struct symbol *find_symbol(const char *name) +static struct symbol *find_symbol(const char *name, const char *ns) { - struct symbol *s; + struct symbol *s, *match; /* For our purposes, .foo matches foo. PPC64 needs this. */ if (name[0] == '.') name++; + match = NULL; for (s = symbolhash[tdb_hash(name) % SYMBOL_HASH_SIZE]; s; s=s-next) { + if (strcmp(s-name, name) == 0) { + match = s; + if (ns s-namespace strcmp(s-namespace, ns)) + continue; + return s; + } + } + return ns ? NULL : match; +} + +static struct nsallow *find_nsallow(const char *name, struct module *mod) +{ + struct nsallow *s; + + for (s = nsallowhash[tdb_hash(name)%NSALLOW_HASH_SIZE]; s; s=s-next) { + if (strcmp(s-name, name) == 0 s-mod == mod) + return s; + } + return NULL; +} + +static struct nsallow *find_nsallow_name(const char *name) +{ + struct nsallow *s; + + for (s = nsallowhash[tdb_hash(name)%NSALLOW_HASH_SIZE]; s;
[PATCH RFC] [3/9] modpost: Declare the modpost error functions as printf like
This way gcc can warn for wrong format strings --- scripts/mod/modpost.c |8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) Index: linux/scripts/mod/modpost.c === --- linux.orig/scripts/mod/modpost.c +++ linux/scripts/mod/modpost.c @@ -33,7 +33,9 @@ enum export { export_unused_gpl, export_gpl_future, export_unknown }; -void fatal(const char *fmt, ...) +#define PRINTF __attribute__ ((format (printf, 1, 2))) + +PRINTF void fatal(const char *fmt, ...) { va_list arglist; @@ -46,7 +48,7 @@ void fatal(const char *fmt, ...) exit(1); } -void warn(const char *fmt, ...) +PRINTF void warn(const char *fmt, ...) { va_list arglist; @@ -57,7 +59,7 @@ void warn(const char *fmt, ...) va_end(arglist); } -void merror(const char *fmt, ...) +PRINTF void merror(const char *fmt, ...) { va_list arglist; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC] [9/9] Add a inet namespace
Shared by IP, IPv6, DCCP, UDPLITE, SCTP. The symbols used by tunnel modules weren't put into any name space because there are quite a lot of them. --- net/core/fib_rules.c|9 -- net/ipv4/af_inet.c | 52 net/ipv4/arp.c |1 net/ipv4/icmp.c | 10 +++ net/ipv4/inet_connection_sock.c | 40 +++--- net/ipv4/inet_diag.c|4 +-- net/ipv4/inet_hashtables.c |8 +++--- net/ipv4/inet_timewait_sock.c | 12 - net/ipv4/ip_input.c |2 - net/ipv4/ip_output.c|7 +++-- net/ipv4/ip_sockglue.c | 10 +++ 11 files changed, 86 insertions(+), 69 deletions(-) Index: linux/net/ipv4/af_inet.c === --- linux.orig/net/ipv4/af_inet.c +++ linux/net/ipv4/af_inet.c @@ -218,7 +218,7 @@ out: } u32 inet_ehash_secret __read_mostly; -EXPORT_SYMBOL(inet_ehash_secret); +EXPORT_SYMBOL_NS(inet, inet_ehash_secret); /* * inet_ehash_secret must be set exactly once @@ -235,7 +235,7 @@ void build_ehash_secret(void) inet_ehash_secret = rnd; spin_unlock_bh(inetsw_lock); } -EXPORT_SYMBOL(build_ehash_secret); +EXPORT_SYMBOL_NS(inet, build_ehash_secret); /* * Create an inet socket. @@ -1127,7 +1127,7 @@ int inet_sk_rebuild_header(struct sock * return err; } -EXPORT_SYMBOL(inet_sk_rebuild_header); +EXPORT_SYMBOL_NS(inet,inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { @@ -1235,6 +1235,8 @@ unsigned long snmp_fold_field(void *mib[ } return res; } +/* AK: Not in inet namespace because they're a generic facility. Probably + should be in another file though. */ EXPORT_SYMBOL_GPL(snmp_fold_field); int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) @@ -1499,20 +1501,30 @@ static int __init ipv4_proc_init(void) MODULE_ALIAS_NETPROTO(PF_INET); -EXPORT_SYMBOL(inet_accept); -EXPORT_SYMBOL(inet_bind); -EXPORT_SYMBOL(inet_dgram_connect); -EXPORT_SYMBOL(inet_dgram_ops); -EXPORT_SYMBOL(inet_getname); -EXPORT_SYMBOL(inet_ioctl); -EXPORT_SYMBOL(inet_listen); -EXPORT_SYMBOL(inet_register_protosw); -EXPORT_SYMBOL(inet_release); -EXPORT_SYMBOL(inet_sendmsg); -EXPORT_SYMBOL(inet_shutdown); -EXPORT_SYMBOL(inet_sock_destruct); -EXPORT_SYMBOL(inet_stream_connect); -EXPORT_SYMBOL(inet_stream_ops); -EXPORT_SYMBOL(inet_unregister_protosw); -EXPORT_SYMBOL(net_statistics); -EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); +MODULE_NAMESPACE_ALLOW(inet, ipv6); +MODULE_NAMESPACE_ALLOW(inet, udplite); +MODULE_NAMESPACE_ALLOW(inet, dccp_ipv6); +MODULE_NAMESPACE_ALLOW(inet, dccp_ipv4); +MODULE_NAMESPACE_ALLOW(inet, dccp); +MODULE_NAMESPACE_ALLOW(inet, sctp); + +/* RED-PEN: would be better to fix wanrouter */ +MODULE_NAMESPACE_ALLOW(inet, wanrouter); + +EXPORT_SYMBOL_NS(inet,inet_accept); +EXPORT_SYMBOL_NS(inet,inet_bind); +EXPORT_SYMBOL_NS(inet,inet_dgram_connect); +EXPORT_SYMBOL_NS(inet,inet_dgram_ops); +EXPORT_SYMBOL_NS(inet,inet_getname); +EXPORT_SYMBOL_NS(inet,inet_ioctl); +EXPORT_SYMBOL_NS(inet,inet_listen); +EXPORT_SYMBOL_NS(inet,inet_register_protosw); +EXPORT_SYMBOL_NS(inet,inet_release); +EXPORT_SYMBOL_NS(inet,inet_sendmsg); +EXPORT_SYMBOL_NS(inet,inet_shutdown); +EXPORT_SYMBOL_NS(inet,inet_sock_destruct); +EXPORT_SYMBOL_NS(inet,inet_stream_connect); +EXPORT_SYMBOL_NS(inet,inet_stream_ops); +EXPORT_SYMBOL_NS(inet,inet_unregister_protosw); +EXPORT_SYMBOL_NS(inet,net_statistics); +EXPORT_SYMBOL_NS(inet,sysctl_ip_nonlocal_bind); Index: linux/net/ipv4/arp.c === --- linux.orig/net/ipv4/arp.c +++ linux/net/ipv4/arp.c @@ -1406,6 +1406,7 @@ static int __init arp_proc_init(void) #endif /* CONFIG_PROC_FS */ +/* No namespace because those are used by various drivers */ EXPORT_SYMBOL(arp_broken_ops); EXPORT_SYMBOL(arp_find); EXPORT_SYMBOL(arp_create); Index: linux/net/ipv4/icmp.c === --- linux.orig/net/ipv4/icmp.c +++ linux/net/ipv4/icmp.c @@ -1101,7 +1101,7 @@ void __init icmp_init(struct net_proto_f } } -EXPORT_SYMBOL(icmp_err_convert); -EXPORT_SYMBOL(icmp_send); -EXPORT_SYMBOL(icmp_statistics); -EXPORT_SYMBOL(xrlim_allow); +EXPORT_SYMBOL_NS(inet, icmp_err_convert); +EXPORT_SYMBOL_NS(inet, icmp_send); +EXPORT_SYMBOL_NS(inet, icmp_statistics); +EXPORT_SYMBOL_NS(inet, xrlim_allow); Index: linux/net/ipv4/inet_connection_sock.c === --- linux.orig/net/ipv4/inet_connection_sock.c +++ linux/net/ipv4/inet_connection_sock.c @@ -26,7 +26,7 @@ #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = inet_csk BUG: unknown timer value\n; -EXPORT_SYMBOL(inet_csk_timer_bug_msg); +EXPORT_SYMBOL_NS(inet, inet_csk_timer_bug_msg); #endif /* @@ -73,7 +73,7 @@ int
[PATCH RFC] [8/9] Put UDP exports into a namespace
The UDP exports are only used by UDPv6 and UDP lite. They are internal functions not supposed to be used by anybody else. So turn them into a name space that only allows those. --- net/ipv4/udp.c | 27 +++ net/ipv4/udplite.c |6 +++--- 2 files changed, 18 insertions(+), 15 deletions(-) Index: linux/net/ipv4/udp.c === --- linux.orig/net/ipv4/udp.c +++ linux/net/ipv4/udp.c @@ -105,6 +105,9 @@ #include net/xfrm.h #include udp_impl.h +MODULE_NAMESPACE_ALLOW(udp, udplite); +MODULE_NAMESPACE_ALLOW(udp, ipv6); + /* * Snmp MIB for the UDP layer */ @@ -1641,18 +1644,18 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -EXPORT_SYMBOL(udp_disconnect); -EXPORT_SYMBOL(udp_hash); -EXPORT_SYMBOL(udp_hash_lock); -EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_get_port); -EXPORT_SYMBOL(udp_prot); -EXPORT_SYMBOL(udp_sendmsg); -EXPORT_SYMBOL(udp_lib_getsockopt); -EXPORT_SYMBOL(udp_lib_setsockopt); -EXPORT_SYMBOL(udp_poll); +EXPORT_SYMBOL_NS(udp, udp_disconnect); +EXPORT_SYMBOL_NS(udp, udp_hash); +EXPORT_SYMBOL_NS(udp, udp_hash_lock); +EXPORT_SYMBOL_NS(udp, udp_ioctl); +EXPORT_SYMBOL_NS(udp, udp_get_port); +EXPORT_SYMBOL_NS(udp, udp_prot); +EXPORT_SYMBOL_NS(udp, udp_sendmsg); +EXPORT_SYMBOL_NS(udp, udp_lib_getsockopt); +EXPORT_SYMBOL_NS(udp, udp_lib_setsockopt); +EXPORT_SYMBOL_NS(udp, udp_poll); #ifdef CONFIG_PROC_FS -EXPORT_SYMBOL(udp_proc_register); -EXPORT_SYMBOL(udp_proc_unregister); +EXPORT_SYMBOL_NS(udp, udp_proc_register); +EXPORT_SYMBOL_NS(udp, udp_proc_unregister); #endif Index: linux/net/ipv4/udplite.c === --- linux.orig/net/ipv4/udplite.c +++ linux/net/ipv4/udplite.c @@ -113,6 +113,6 @@ out_register_err: printk(KERN_CRIT %s: Cannot add UDP-Lite protocol.\n, __FUNCTION__); } -EXPORT_SYMBOL(udplite_hash); -EXPORT_SYMBOL(udplite_prot); -EXPORT_SYMBOL(udplite_get_port); +EXPORT_SYMBOL_NS(udp, udplite_hash); +EXPORT_SYMBOL_NS(udp, udplite_prot); +EXPORT_SYMBOL_NS(udp, udplite_get_port); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC] [7/9] Convert TCP exports into namespaces
I defined two namespaces: tcp for TCP internals which are only used by tcp_ipv6.ko And tcpcong for exports used by the TCP congestion modules No need to export any TCP internals to anybody else. So express this in a namespace. I admit I'm not 100% sure tcpcong makes sense -- there might be a legitimate need to have external out of tree congestion modules. They seem nearly like drivers, but only nearly. If that was deemed the case it would be possible to remove tcpcong again to allow everybody to access this. This implicitely turns all exports into GPL only, but that won't matter because all modules allowed to import TCP functions are GPLed. --- net/ipv4/tcp.c | 71 +++ net/ipv4/tcp_cong.c | 14 - net/ipv4/tcp_input.c | 12 +++ net/ipv4/tcp_ipv4.c | 38 - net/ipv4/tcp_minisocks.c | 12 +++ net/ipv4/tcp_output.c| 12 +++ net/ipv4/tcp_timer.c |2 - 7 files changed, 87 insertions(+), 74 deletions(-) Index: linux/net/ipv4/tcp.c === --- linux.orig/net/ipv4/tcp.c +++ linux/net/ipv4/tcp.c @@ -275,21 +275,21 @@ DEFINE_SNMP_STAT(struct tcp_mib, tcp_sta atomic_t tcp_orphan_count = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(tcp_orphan_count); +EXPORT_SYMBOL_NS(tcp, tcp_orphan_count); int sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_mem); -EXPORT_SYMBOL(sysctl_tcp_rmem); -EXPORT_SYMBOL(sysctl_tcp_wmem); +EXPORT_SYMBOL_NS(tcp, sysctl_tcp_mem); +EXPORT_SYMBOL_NS(tcp, sysctl_tcp_rmem); +EXPORT_SYMBOL_NS(tcp, sysctl_tcp_wmem); atomic_t tcp_memory_allocated; /* Current allocated memory. */ atomic_t tcp_sockets_allocated;/* Current number of TCP sockets. */ -EXPORT_SYMBOL(tcp_memory_allocated); -EXPORT_SYMBOL(tcp_sockets_allocated); +EXPORT_SYMBOL_NS(tcp, tcp_memory_allocated); +EXPORT_SYMBOL_NS(tcp, tcp_sockets_allocated); /* * Pressure flag: try to collapse. @@ -299,7 +299,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated); */ int tcp_memory_pressure __read_mostly; -EXPORT_SYMBOL(tcp_memory_pressure); +EXPORT_SYMBOL_NS(tcp, tcp_memory_pressure); void tcp_enter_memory_pressure(void) { @@ -309,7 +309,7 @@ void tcp_enter_memory_pressure(void) } } -EXPORT_SYMBOL(tcp_enter_memory_pressure); +EXPORT_SYMBOL_NS(tcp, tcp_enter_memory_pressure); /* * Wait for a TCP event. @@ -1995,7 +1995,7 @@ int compat_tcp_setsockopt(struct sock *s return do_tcp_setsockopt(sk, level, optname, optval, optlen); } -EXPORT_SYMBOL(compat_tcp_setsockopt); +EXPORT_SYMBOL_NS(tcp, compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ @@ -2061,7 +2061,7 @@ void tcp_get_info(struct sock *sk, struc info-tcpi_total_retrans = tp-total_retrans; } -EXPORT_SYMBOL_GPL(tcp_get_info); +EXPORT_SYMBOL_NS(tcp, tcp_get_info); static int do_tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -2174,7 +2174,7 @@ int compat_tcp_getsockopt(struct sock *s return do_tcp_getsockopt(sk, level, optname, optval, optlen); } -EXPORT_SYMBOL(compat_tcp_getsockopt); +EXPORT_SYMBOL_NS(tcp, compat_tcp_getsockopt); #endif struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) @@ -2262,7 +2262,7 @@ struct sk_buff *tcp_tso_segment(struct s out: return segs; } -EXPORT_SYMBOL(tcp_tso_segment); +EXPORT_SYMBOL_NS(tcp, tcp_tso_segment); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; @@ -2298,7 +2298,7 @@ void tcp_free_md5sig_pool(void) __tcp_free_md5sig_pool(pool); } -EXPORT_SYMBOL(tcp_free_md5sig_pool); +EXPORT_SYMBOL_NS(tcp, tcp_free_md5sig_pool); static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) { @@ -2371,7 +2371,7 @@ retry: return pool; } -EXPORT_SYMBOL(tcp_alloc_md5sig_pool); +EXPORT_SYMBOL_NS(tcp, tcp_alloc_md5sig_pool); struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) { @@ -2384,14 +2384,14 @@ struct tcp_md5sig_pool *__tcp_get_md5sig return (p ? *per_cpu_ptr(p, cpu) : NULL); } -EXPORT_SYMBOL(__tcp_get_md5sig_pool); +EXPORT_SYMBOL_NS(tcp, __tcp_get_md5sig_pool); void __tcp_put_md5sig_pool(void) { tcp_free_md5sig_pool(); } -EXPORT_SYMBOL(__tcp_put_md5sig_pool); +EXPORT_SYMBOL_NS(tcp, __tcp_put_md5sig_pool); #endif void tcp_done(struct sock *sk) @@ -2409,7 +2409,7 @@ void tcp_done(struct sock *sk) else inet_csk_destroy_sock(sk); } -EXPORT_SYMBOL_GPL(tcp_done); +EXPORT_SYMBOL_NS(tcp, tcp_done); extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; @@ -2524,15 +2524,28 @@ void __init tcp_init(void) tcp_register_congestion_control(tcp_reno); } -EXPORT_SYMBOL(tcp_close); -EXPORT_SYMBOL(tcp_disconnect);
Re: [RFC/PATCH] SO_NO_CHECK for IPv6
In article [EMAIL PROTECTED] (at Thu, 22 Nov 2007 10:34:03 +0800), Herbert Xu [EMAIL PROTECTED] says: On Wed, Nov 21, 2007 at 07:17:40PM -0500, Jeff Garzik wrote: For those interested, I am dealing with a UDP app that already does very strong checksumming and encryption, so additional software checksumming at the lower layers is quite simply a waste of CPU cycles. Hardware checksumming is fine, as long as its free. No matter how strong your underlying checksumming is it's not going to protect the IPv6 header is it :) In that sense, we should use AH. --yoshfuji - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module
Hi jamal, Good stuff. I have not read your paper - There are NICs out there (chelsio comes to mind) which claim to do pacing and have shown impressive numbers with TCP. Is your approach similar? Are there patents involved by some of these hardware vendors? (It would not be suprising if they exist). As far as I know, no. (I have not the details of chelsio NICs.) Pacing is a general idea, but our approach of implementation is a unique. PSPacer makes bursty traffic which is often generated by TCP smooth without any special hardware. The advantage with NICs is they have very good control of the timing (clock granularity being extremely important in cases like this) - what were your measurements based on i.e what clock source did you use on Linux? The key idea of PSPacer is to determine transmission timing of packets by the number of bytes transferred. If packets are transferred back to back, the timing a packet is sent can be determined by the number of bytes sent before the packet. PSPacer fills the gaps between time aligned real packets (the packets which are sent by user program) by gap packets. The real packets and gap packets are sent back to back, and thus the timing of transmission of each real packet can be precisely controlled by adjusting the gap packet size. As the gap packets, the IEEE 802.3x PAUSE frames are used. PAUSE frames are discarded at a switch input port, and only real packets go through the switch keeping the original intervals. In the past, some software-based pacing schemes have been proposed. These schemes use timer interrupt based packet transmission timing control. Therefore, to achieve precise pacing, they require the operating system to maintain a high resolution timer, which could incur a large overhead. Also, the idea of using a PAUSE frame to add gaps is interesting, but you should note that in linux a qdisc may be attached to any network device and this for example maybe a PPP device etc. What would you use for gaps in that case? I apologize if the answers are in your papers - i just glossed over. cheers, jamal Best regards, Ryousei Takano - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.
On Thu, 22 Nov 2007 03:43:06 +0100 (CET) Andi Kleen [EMAIL PROTECTED] wrote: There seems to be rough consensus that the kernel currently has too many exported symbols. A lot of these exports are generally usable utility functions or important driver interfaces; but another large part are functions intended by only one or two very specific modules for a very specific purpose. One example is the TCP code. It has most of its internals exported, but only for use by tcp_ipv6.c (and now a few more by the TCP/IP congestion modules) But it doesn't make sense to include these exported for a specific module functions into a broader kernel interface. External modules assume they can use these functions, but they were never intended for that. This patch allows to export symbols only for specific modules by introducing symbol name spaces. A module name space has a white list of modules that are allowed to import symbols for it; all others can't use the symbols. It adds two new macros: MODULE_NAMESPACE_ALLOW(namespace, module); Allow module to import symbols from namespace. module is the module name without .ko as displayed by lsmod. Must be in the same module as the export (and be duplicated if there are multiple modules exporting symbols to a namespace). Multiple allows for the same name space are allowed. EXPORT_SYMBOL_NS(namespace, symbol); Hi, I like this concept in general; I have one minor comment; right now your namespace argument is like EXPORT_SYMBOL_NS(foo, some_symbol); from a language-like pov I kinda wonder if it's nicer to do EXPORT_SYMBOL_NS(foo, some_symbol); because foo isn't something in C scope, but more a string-like identifier... -- If you want to reach me at my work email, use [EMAIL PROTECTED] For development, discussion and tips for power savings, visit http://www.lesswatts.org - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH 1/3] NET_SCHED: PSPacer qdisc module
I am sorry I send an unfinished mail. Also, the idea of using a PAUSE frame to add gaps is interesting, but you should note that in linux a qdisc may be attached to any network device and this for example maybe a PPP device etc. What would you use for gaps in that case? You are right. PSPacer depends on the Ethernet, and it is not pretty good. Now I do not have any ideas for the other network devices. Do anyone have any ideas or suggestions? Best regards, Ryousei Takano - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.
I like this concept in general; I have one minor comment; right now your namespace argument is like EXPORT_SYMBOL_NS(foo, some_symbol); from a language-like pov I kinda wonder if it's nicer to do EXPORT_SYMBOL_NS(foo, some_symbol); because foo isn't something in C scope, but more a string-like identifier... That wouldn't work for MODULE_ALLOW() because it appends the namespace to other identifiers. I don't know of a way in the C processor to get back from a string to a ## concatenable identifier. For EXPORT_SYMBOL_NS it would be in theory possible, but making it asymmetric to MODULE_ALLOW would be ugly imho. -Andi - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.
On Thu, Nov 22, 2007 at 03:43:06AM +0100, Andi Kleen wrote: There seems to be rough consensus that the kernel currently has too many exported symbols. A lot of these exports are generally usable utility functions or important driver interfaces; but another large part are functions intended by only one or two very specific modules for a very specific purpose. One example is the TCP code. It has most of its internals exported, but only for use by tcp_ipv6.c (and now a few more by the TCP/IP congestion modules) But it doesn't make sense to include these exported for a specific module functions into a broader kernel interface. External modules assume they can use these functions, but they were never intended for that. This patch allows to export symbols only for specific modules by introducing symbol name spaces. A module name space has a white list of modules that are allowed to import symbols for it; all others can't use the symbols. I really like this patchset. Definitely a step in the right direction imo. Looks like some nits there that checkpatch will probably pick up on, but otherwise, looks very straightforward too. Kudos. Dave -- http://www.codemonkey.org.uk - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] [1/9] Core module symbol namespaces code and intro.
On Thursday 22 November 2007 13:43:06 Andi Kleen wrote: There seems to be rough consensus that the kernel currently has too many exported symbols. A lot of these exports are generally usable utility functions or important driver interfaces; but another large part are functions intended by only one or two very specific modules for a very specific purpose. Hi Andi, This is an interesting idea, thanks for the code! My only question is whether we can get most of this benefit by dropping the indirection of namespaces and have something like EXPORT_SYMBOL_TO(sym, modname)? It doesn't work so well for exporting to a group of modules, but that seems a reasonable line to draw anyway. Cheers, Rusty. PS. Probably better to use the standard warnx and errx in modpost, too. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [README] away until Dec 3rd
On Tue, Nov 20, 2007 at 08:29:21PM -0800, David Miller wrote: During this time Herbert Xu (CC:'d) will take care of both the net-2.6 stable tree and the net-2.6.25 devel tree. For this duration please use the net-2.6.25 tree at this location for basing your patches: git://git.kernel.org/pub/scm/linux/kernel/git/herbert/net-2.6.25.git/ Please note that this tree has already been rebased compared to Dave's net-2.6.25 tree. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC][PATCH 0/3] PSPacer qdisc module
Hi all, I sent this mail yesterday, but it did not be delivered. So I resend it. I am sorry if you receive duplicate mails. What is PSPacer? PSPacer (Precise Software Pacer) is a qdisc module which realizes precise transmission bandwidth control. It makes bursty traffic which is often generated by TCP smooth without any special hardware. Bursty traffic can degrade the communication performance, because it causes buffer overflow at intermediate network nodes and results in packet losses. In a bursty traffic, packets are sent back to back. By adding a short pause in between the packets, traffic bursts can be avoided. PSPacer controls the interval between outgoing packets very precisely. The key idea of PSPacer is to determine transmission timing of packets by the number of bytes transferred. If packets are transferred back to back, the timing a packet is sent can be determined by the number of bytes sent before the packet. PSPacer fills the gaps between time aligned real packets (the packets which are sent by user program) by gap packets. The real packets and gap packets are sent back to back, and thus the timing of transmission of each real packet can be precisely controlled by adjusting the gap packet size. As the gap packets, the IEEE 802.3x PAUSE frames are used. PAUSE frames are discarded at a switch input port, and only real packets go through the switch keeping the original intervals. In the past, some software-based pacing schemes have been proposed. These schemes use timer interrupt based packet transmission timing control. Therefore, to achieve precise pacing, they require the operating system to maintain a high resolution timer, which could incur a large overhead. The patchset consists of two parts: one part is to be applied to the Linux kernel, and the other is to be applied to the iproute2. For detailed description and the usage of PSPacer, please refer to our project page (http://www.gridmpi.org/gridtcp.jsp), and the paper Design and Evaluation of Precise Software Pacing Mechanisms for Fast Long-Distance Networks, in PFLDnet2005. Usage - setup qdiscs (add the PSPacer qdisc as the root qdisc) # /sbin/tc qdisc add dev eth0 root handle 1: psp default 1 (add the PSPacer class whose target rate is 500Mbps) # /sbin/tc class add dev eth0 parent 1: classid 1:1 psp rate 500mbit (add the PFIFO qdisc as the sub qdisc) # /sbin/tc qdisc add dev eth0 parent 1:1 handle 10: pfifo - run iperf (to confirm the effect of PSPacer) $ iperf -c 192.168.1.2 -i 10 -t 60 Client connecting to 192.168.1.2, TCP port 5122 TCP window size: 16.0 KByte (default) iperf shows payload bandwidth. 476Mbps is the payload bandwidth when the physical layer bandwidth is 500Mbps and packet size is 1500Bytes [ 3] local 192.168.1.1 port 46457 connected with 192.168.1.2 port 5122 [ 3] 0.0-10.0 sec567 MBytes476 Mbits/sec [ 3] 10.0-20.0 sec567 MBytes476 Mbits/sec - cleanup qdiscs (remove the PFIFO sub qdisc) # /sbin/tc qdisc del dev eth0 parent 1:1 handle 10: (remove the PSPacer class) # /sbin/tc class del dev eth0 parent 1: classid 1:1 (remove the PSPacer qdisc) # /sbin/tc qdisc del dev eth0 root handle 1: (remove the PSPacer module) # /sbin/rmmod sch_psp Limitations (1) PSPacer controls the bandwidth according to the ratio of the target bandwidth in the maximum transmission bandwidth of the system. Therefore, the system (computer, network interface, operating system, buffer settings, etc.) should have a capability to transmit packets at the maximum transmission rate (i.e. 1 Gbps for 1000BASE, 100 Mbps for 100BASE) to realize a precise pacing. Therefore, if you want to control Gigabit Ethernet traffic, we recommend to use PCI-X, 66MHz/64bit PCI or CSA connected network interface. If the total of target bandwidth of the output streams is less than 100Mbps, you can set the network interface to use 100BASE mode so as to obtain precise pacing. For the same reason, avoid using a shared switch (dumb hub) for the edge switch to which the PC with PSPacer is connected. (2) PSPacer uses the IEEE 802.3x PAUSE frame as the gap between packets. Therefore, you can not use the PAUSE frame to stop transmission from the switch/router to the PC. Since PSPacer generates PAUSE frames with zero pause time, there should not be any side effects other than you can not stop transmission from the switch. However, it is recommended to disable IEEE 802.3x flow control function of the switch (to which a PC with PSPacer is connected) in order to avoid unexpected behavior. (3) PSPacer does not support TCP Segmentation Offloading (TSO). You have to disable TSO by using the ethtool command (ethtool -K eth0 tso off). Best regards,
[RFC][PATCH] bonding layer2+3 xmit_hash_policy
I posted this 2 weeks ago on the bonding-devel list, but didn't receive any feedback so thought I would bring it up to a larger audience. Included is a patch for a new xmit_hash_policy for the bonding driver that selects slaves based on MAC and IP information. This is a middle ground between what currently exists in the layer2 only policy and the layer3+4 policy. This policy strives to be fully 802.3ad compliant by transmitting every packet of any particular flow over the same link. As documented the layer3+4 policy is not fully compliant for extreme cases such as ip fragmentation, so this policy is a nice compromise for environments that require full compliance but desire more than the layer2 only policy. Comments? --- Documentation/networking/bonding.txt | 23 +++ drivers/net/bonding/bond_main.c | 26 -- include/linux/if_bonding.h |3 ++- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 1134062..5ac84c0 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -587,6 +587,29 @@ xmit_hash_policy most UDP traffic is not involved in extended conversations. Other implementations of 802.3ad may or may not tolerate this noncompliance. + + layer2+3 + + This policy uses a combination of layer2 and layer3 + protocol information to generate the hash. + + Uses XOR of hardware MAC addresses and IP addresses to + generate the hash. The formula is + + (((source IP XOR dest IP) AND 0x) XOR + ( source MAC XOR destination MAC )) + modulo slave count + + This algorithm will place all traffic to a particular + network peer on the same slave. For non-IP traffic, + the formula is the same as for the layer2 transmit + hash policy. + + This policy is intended to provide a more balanced + distribution of traffic than layer2 alone, especially + in environments where a layer3 gateway device is + required to reach most destinations. This algorithm is + fully 802.3ad complient. The default value is layer2. This option was added in bonding version 2.6.3. In earlier versions of bonding, this parameter does diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 423298c..a731812 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -174,6 +174,7 @@ struct bond_parm_tbl bond_mode_tbl[] = { struct bond_parm_tbl xmit_hashtype_tbl[] = { { layer2, BOND_XMIT_POLICY_LAYER2}, { layer3+4, BOND_XMIT_POLICY_LAYER34}, +{ layer2+3, BOND_XMIT_POLICY_LAYER23}, { NULL, -1}, }; @@ -3604,6 +3605,24 @@ void bond_unregister_arp(struct bonding *bond) /* Hashing Policies -*/ /* + * Hash for the output device based upon layer 2 and layer 3 data. If + * the packet is not IP mimic bond_xmit_hash_policy_l2() + */ +static int bond_xmit_hash_policy_l23(struct sk_buff *skb, + struct net_device *bond_dev, int count) +{ + struct ethhdr *data = (struct ethhdr *)skb-data; + struct iphdr *iph = ip_hdr(skb); + + if (skb-protocol == __constant_htons(ETH_P_IP)) { + return ((ntohl(iph-saddr ^ iph-daddr) 0x) ^ + (data-h_dest[5] ^ bond_dev-dev_addr[5])) % count; + } + + return (data-h_dest[5] ^ bond_dev-dev_addr[5]) % count; +} + +/* * Hash for the output device based upon layer 3 and layer 4 data. If * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is * altogether not IP, mimic bond_xmit_hash_policy_l2() @@ -4323,6 +4342,8 @@ void bond_set_mode_ops(struct bonding *bond, int mode) bond_dev-hard_start_xmit = bond_xmit_xor; if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER34) bond-xmit_hash_policy = bond_xmit_hash_policy_l34; + else if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER23) + bond-xmit_hash_policy = bond_xmit_hash_policy_l23; else bond-xmit_hash_policy = bond_xmit_hash_policy_l2; break; @@ -4334,6 +4355,8 @@ void bond_set_mode_ops(struct bonding *bond, int mode) bond_dev-hard_start_xmit = bond_3ad_xmit_xor; if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER34) bond-xmit_hash_policy = bond_xmit_hash_policy_l34; + else if (bond-params.xmit_policy == BOND_XMIT_POLICY_LAYER23) +