[1/1] connector: add userspace example code into Documentation/connector/
Hello. I was asked several times to include userspace example code into Documentation, so if there is no policy against it, consider attached patch for 2.6.18. This program works with included Documentation/connector/cn_test.c connector module. Thank you. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] --- /dev/null 2006-08-23 17:09:03.438578500 +0400 +++ ./Documentation/connector/ucon.c2006-08-25 11:06:57.0 +0400 @@ -0,0 +1,205 @@ +/* + * ucon.c + * + * Copyright (c) 2004+ Evgeniy Polyakov [EMAIL PROTECTED] + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include asm/types.h + +#include sys/types.h +#include sys/socket.h +#include sys/poll.h + +#include linux/netlink.h +#include linux/rtnetlink.h + +#include arpa/inet.h + +#include stdio.h +#include stdlib.h +#include unistd.h +#include string.h +#include errno.h +#include time.h + +#include linux/connector.h + +#define DEBUG +#define NETLINK_CONNECTOR 11 + +#ifdef DEBUG +#define ulog(f, a...) fprintf(stdout, f, ##a) +#else +#define ulog(f, a...) do {} while (0) +#endif + +static int need_exit; +static __u32 seq; + +static int netlink_send(int s, struct cn_msg *msg) +{ + struct nlmsghdr *nlh; + unsigned int size; + int err; + char buf[128]; + struct cn_msg *m; + + size = NLMSG_SPACE(sizeof(struct cn_msg) + msg-len); + + nlh = (struct nlmsghdr *)buf; + nlh-nlmsg_seq = seq++; + nlh-nlmsg_pid = getpid(); + nlh-nlmsg_type = NLMSG_DONE; + nlh-nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh)); + nlh-nlmsg_flags = 0; + + m = NLMSG_DATA(nlh); +#if 0 + ulog(%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n, + __func__, msg-id.idx, msg-id.val, msg-len, msg-seq, msg-ack); +#endif + memcpy(m, msg, sizeof(*m) + msg-len); + + err = send(s, nlh, size, 0); + if (err == -1) + ulog(Failed to send: %s [%d].\n, + strerror(errno), errno); + + return err; +} + +int main(int argc, char *argv[]) +{ + int s; + char buf[1024]; + int len; + struct nlmsghdr *reply; + struct sockaddr_nl l_local; + struct cn_msg *data; + FILE *out; + time_t tm; + struct pollfd pfd; + + if (argc 2) + out = stdout; + else { + out = fopen(argv[1], a+); + if (!out) { + ulog(Unable to open %s for writing: %s\n, + argv[1], strerror(errno)); + out = stdout; + } + } + + memset(buf, 0, sizeof(buf)); + + s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + if (s == -1) { + perror(socket); + return -1; + } + + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 0x123; + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == -1) { + perror(bind); + close(s); + return -1; + } + + { + int on = l_local.nl_groups; + setsockopt(s, 270, 1, on, sizeof(on)); + } + + if (0) { + int i, j; + + memset(buf, 0, sizeof(buf)); + + data = (struct cn_msg *)buf; + + data-id.idx = 0x123; + data-id.val = 0x456; + data-seq = seq++; + data-ack = 0; + data-len = 0; + + for (j=0; j10; ++j) { + for (i=0; i1000; ++i) { + len = netlink_send(s, data); + } + + ulog(%d messages have been sent to %08x.%08x.\n, i, data-id.idx, data-id.val); + } + + return 0; + } + + + pfd.fd = s; + + while (!need_exit) { + pfd.events = POLLIN; + pfd.revents = 0; + switch (poll(pfd, 1, -1)) { + case 0: + need_exit = 1; + break; + case -1: + if (errno != EINTR) { + need_exit
Re: [1/1] connector: add userspace example code into Documentation/connector/
Evgeniy Polyakov wrote: Hello. I was asked several times to include userspace example code into Documentation, so if there is no policy against it, consider attached patch for 2.6.18. This program works with included Documentation/connector/cn_test.c connector module. + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 0x123; + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == -1) { + perror(bind); + close(s); + return -1; + } + + { + int on = l_local.nl_groups; + setsockopt(s, 270, 1, on, sizeof(on)); + } Example code shouldn't use magic numbers, please use the proper defines. And the code is wrong, using the same group number for bind (which takes a bitmask) and setsockopt (which takes a group number) doesn't work. Its not necessary to use setsockopt if you already used bind anyway. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/1] connector: add userspace example code into Documentation/connector/
On Fri, Aug 25, 2006 at 08:57:23AM +0200, Patrick McHardy ([EMAIL PROTECTED]) wrote: Evgeniy Polyakov wrote: Hello. I was asked several times to include userspace example code into Documentation, so if there is no policy against it, consider attached patch for 2.6.18. This program works with included Documentation/connector/cn_test.c connector module. + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 0x123; + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == -1) { + perror(bind); + close(s); + return -1; + } + + { + int on = l_local.nl_groups; + setsockopt(s, 270, 1, on, sizeof(on)); + } Example code shouldn't use magic numbers, please use the proper defines. And the code is wrong, using the same group number for bind (which takes a bitmask) and setsockopt (which takes a group number) doesn't work. Its not necessary to use setsockopt if you already used bind anyway. I put there explicit socket option to show how it works in case there will be several group requests, which will not be placed into initial bind call. But you are right, that magic numbers are not that good. I will update program with appropriate changes. Thank you. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/1] connector: add userspace example code into Documentation/connector/
Hello. I was asked several times to include userspace example code into Documentation, so if there is no policy against it, consider attached patch for 2.6.18. This program works with included Documentation/connector/cn_test.c connector module. Thank you. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] --- /dev/null 2006-08-23 17:09:03.438578500 +0400 +++ ./Documentation/connector/ucon.c2006-08-25 11:31:48.0 +0400 @@ -0,0 +1,206 @@ +/* + * ucon.c + * + * Copyright (c) 2004+ Evgeniy Polyakov [EMAIL PROTECTED] + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include asm/types.h + +#include sys/types.h +#include sys/socket.h +#include sys/poll.h + +#include linux/netlink.h +#include linux/rtnetlink.h + +#include arpa/inet.h + +#include stdio.h +#include stdlib.h +#include unistd.h +#include string.h +#include errno.h +#include time.h + +#include linux/connector.h + +#define DEBUG +#define NETLINK_CONNECTOR 11 + +#ifdef DEBUG +#define ulog(f, a...) fprintf(stdout, f, ##a) +#else +#define ulog(f, a...) do {} while (0) +#endif + +static int need_exit; +static __u32 seq; + +static int netlink_send(int s, struct cn_msg *msg) +{ + struct nlmsghdr *nlh; + unsigned int size; + int err; + char buf[128]; + struct cn_msg *m; + + size = NLMSG_SPACE(sizeof(struct cn_msg) + msg-len); + + nlh = (struct nlmsghdr *)buf; + nlh-nlmsg_seq = seq++; + nlh-nlmsg_pid = getpid(); + nlh-nlmsg_type = NLMSG_DONE; + nlh-nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh)); + nlh-nlmsg_flags = 0; + + m = NLMSG_DATA(nlh); +#if 0 + ulog(%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n, + __func__, msg-id.idx, msg-id.val, msg-len, msg-seq, msg-ack); +#endif + memcpy(m, msg, sizeof(*m) + msg-len); + + err = send(s, nlh, size, 0); + if (err == -1) + ulog(Failed to send: %s [%d].\n, + strerror(errno), errno); + + return err; +} + +int main(int argc, char *argv[]) +{ + int s; + char buf[1024]; + int len; + struct nlmsghdr *reply; + struct sockaddr_nl l_local; + struct cn_msg *data; + FILE *out; + time_t tm; + struct pollfd pfd; + + if (argc 2) + out = stdout; + else { + out = fopen(argv[1], a+); + if (!out) { + ulog(Unable to open %s for writing: %s\n, + argv[1], strerror(errno)); + out = stdout; + } + } + + memset(buf, 0, sizeof(buf)); + + s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + if (s == -1) { + perror(socket); + return -1; + } + + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 0x123; /* bitmask of requested groups */ + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == -1) { + perror(bind); + close(s); + return -1; + } + +#if 0 + { + int on = 0x57; /* Additional group number */ + setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, on, sizeof(on)); + } +#endif + if (0) { + int i, j; + + memset(buf, 0, sizeof(buf)); + + data = (struct cn_msg *)buf; + + data-id.idx = 0x123; + data-id.val = 0x456; + data-seq = seq++; + data-ack = 0; + data-len = 0; + + for (j=0; j10; ++j) { + for (i=0; i1000; ++i) { + len = netlink_send(s, data); + } + + ulog(%d messages have been sent to %08x.%08x.\n, i, data-id.idx, data-id.val); + } + + return 0; + } + + + pfd.fd = s; + + while (!need_exit) { + pfd.events = POLLIN; + pfd.revents = 0; + switch (poll(pfd, 1, -1)) { + case 0: + need_exit = 1; + break; + case -1: +
Re: [1/1] connector: add userspace example code into Documentation/connector/
Evgeniy Polyakov wrote: + l_local.nl_family = AF_NETLINK; + l_local.nl_groups = 0x123; /* bitmask of requested groups */ + l_local.nl_pid = 0; + + if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == -1) { + perror(bind); + close(s); + return -1; + } + +#if 0 + { + int on = 0x57; /* Additional group number */ + setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, on, sizeof(on)); + } +#endif That looks better, thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[GIT PATCH] IPv6 Updates for net-2.6.19
Hello. Please pull git://git.skbuff.net/gitroot/yoshfuji/net-2.6.19-20060825-inet6 for the following updates on top of the net-2.6.19 tree. Regards, HEADLINES - [IPV6] MIP6: Several obvious clean-ups. [IPV6] ROUTE: Routing by Traffic Class. [IPV6] ROUTE: Routing by FWMARK. [NET]: Add common helper functions to convert IPv6/IPv4 address string to network address structure. [NETFILTER] NF_CONNTRACK_FTP: Use in6_pton() to convert address string. DIFFSTAT include/linux/fib_rules.h|2 include/linux/inet.h |2 include/net/flow.h |2 net/core/utils.c | 215 ++ net/ipv6/Kconfig |7 + net/ipv6/ah6.c | 45 +--- net/ipv6/exthdrs.c |1 net/ipv6/fib6_rules.c| 26 + net/ipv6/mip6.c |6 + net/ipv6/route.c |1 net/netfilter/nf_conntrack_ftp.c | 96 + 11 files changed, 268 insertions(+), 135 deletions(-) CHANGESETS -- commit 6dabb77fd82cd927727d5fb8136eff2e123910f5 Author: YOSHIFUJI Hideaki [EMAIL PROTECTED] Date: Thu Aug 24 23:18:12 2006 +0900 [IPV6] MIP6: Several obvious clean-ups. - Remove redundant code. Pointed out by Brian Haley [EMAIL PROTECTED]. - Unify code paths with/without CONFIG_IPV6_MIP. - Use NIP6_FMT for IPv6 address textual presentation. - Fold long line. Pointed out by David Miller [EMAIL PROTECTED]. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 164546b..9b007eb 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -128,9 +128,7 @@ static void ipv6_rearrange_destopt(struc off += optlen; len -= optlen; } - if (len == 0) - return; - + /* Note: ok if len == 0 */ bad: return; } @@ -175,11 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6_addr_copy(iph-daddr, final_addr); } -#ifdef CONFIG_IPV6_MIP6 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) -#else -static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) -#endif { union { struct ipv6hdr *iph; @@ -194,30 +188,12 @@ #endif while (exthdr.raw end) { switch (nexthdr) { -#ifdef CONFIG_IPV6_MIP6 - case NEXTHDR_HOP: - if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING overrun %sopts\n, - nexthdr == NEXTHDR_HOP ? - hop : dest); - return -EINVAL; - } - break; case NEXTHDR_DEST: +#ifdef CONFIG_IPV6_MIP6 if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); - if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING overrun %sopts\n, - nexthdr == NEXTHDR_HOP ? - hop : dest); - return -EINVAL; - } - break; -#else +#endif case NEXTHDR_HOP: - case NEXTHDR_DEST: if (!zero_out_mutable_opts(exthdr.opth)) { LIMIT_NETDEBUG( KERN_WARNING overrun %sopts\n, @@ -226,7 +202,6 @@ #else return -EINVAL; } break; -#endif case NEXTHDR_ROUTING: ipv6_rearrange_rthdr(iph, exthdr.rth); @@ -282,16 +257,13 @@ #endif } #ifdef CONFIG_IPV6_MIP6 memcpy(tmp_ext, top_iph-saddr, extlen); - err = ipv6_clear_mutable_options(top_iph, -extlen - sizeof(*tmp_ext) + -sizeof(*top_iph), -XFRM_POLICY_OUT); #else memcpy(tmp_ext, top_iph-daddr, extlen); +#endif err = ipv6_clear_mutable_options(top_iph, extlen - sizeof(*tmp_ext) + -sizeof(*top_iph)); -#endif +sizeof(*top_iph), +XFRM_POLICY_OUT); if (err) goto error_free_iph; } @@ -382,13 +354,8 @@ static int ah6_input(struct xfrm_state * if (!tmp_hdr
Re: [PATCH] tcp_lp: use BUILD_BUG_ON
From: Alexey Dobriyan [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 05:08:31 +0400 Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED] Applied. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] tcp_bic: use BUILD_BUG_ON
From: Alexey Dobriyan [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 05:08:02 +0400 Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED] Applied. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/1] connector: add userspace example code into Documentation/connector/
From: Evgeniy Polyakov [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 11:11:02 +0400 Hello. I was asked several times to include userspace example code into Documentation, so if there is no policy against it, consider attached patch for 2.6.18. This program works with included Documentation/connector/cn_test.c connector module. Thank you. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] Fair enough, applied (after killing all of the trailing whitespace). Thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/1] connector: add userspace example code into Documentation/connector/
On Fri, Aug 25, 2006 at 12:52:33AM -0700, David Miller ([EMAIL PROTECTED]) wrote: From: Evgeniy Polyakov [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 11:11:02 +0400 Hello. I was asked several times to include userspace example code into Documentation, so if there is no policy against it, consider attached patch for 2.6.18. This program works with included Documentation/connector/cn_test.c connector module. Thank you. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] Fair enough, applied (after killing all of the trailing whitespace). I mailed it myself and applied to 2.6.18 git tree - patch -p1 did not complain for sure :) Thank you. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/1] connector: add userspace example code into Documentation/connector/
From: Evgeniy Polyakov [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 12:15:03 +0400 I mailed it myself and applied to 2.6.18 git tree - patch -p1 did not complain for sure :) GIT always complains very loudly about any trailing whitespace on any lines, patch is too dumb to do that. You do not need to use GIT trees to check this, just run: git apply --check --whitespace=error-all $PATCH and it will let you know. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PATCH] IPv6 Updates for net-2.6.19
In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 03:00:34 +0900 (JST)), YOSHIFUJI Hideaki [EMAIL PROTECTED] says: Please pull git://git.skbuff.net/gitroot/yoshfuji/net-2.6.19-20060825-inet6 +int in6_pton(const char *src, int srclen, : + printf(srclen=%d\n, srclen); (FYI, Dave has pointed out and fixed up this this but anyway,) My fault... Here's the updated version. It is available at the same URL. I'll be more careful in the future. HEADLINES - [IPV6] MIP6: Several obvious clean-ups. [IPV6] ROUTE: Routing by Traffic Class. [IPV6] ROUTE: Routing by FWMARK. [NET]: Add common helper functions to convert IPv6/IPv4 address string to network address structure. [NETFILTER] NF_CONNTRACK_FTP: Use in6_pton() to convert address string. DIFFSTAT include/linux/fib_rules.h|2 include/linux/inet.h |2 include/net/flow.h |2 net/core/utils.c | 213 ++ net/ipv6/Kconfig |7 + net/ipv6/ah6.c | 45 +--- net/ipv6/exthdrs.c |1 net/ipv6/fib6_rules.c| 26 + net/ipv6/mip6.c |6 + net/ipv6/route.c |1 net/netfilter/nf_conntrack_ftp.c | 96 + 11 files changed, 266 insertions(+), 135 deletions(-) CHANGESETS -- commit 6dabb77fd82cd927727d5fb8136eff2e123910f5 Author: YOSHIFUJI Hideaki [EMAIL PROTECTED] Date: Thu Aug 24 23:18:12 2006 +0900 [IPV6] MIP6: Several obvious clean-ups. - Remove redundant code. Pointed out by Brian Haley [EMAIL PROTECTED]. - Unify code paths with/without CONFIG_IPV6_MIP. - Use NIP6_FMT for IPv6 address textual presentation. - Fold long line. Pointed out by David Miller [EMAIL PROTECTED]. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 164546b..9b007eb 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -128,9 +128,7 @@ static void ipv6_rearrange_destopt(struc off += optlen; len -= optlen; } - if (len == 0) - return; - + /* Note: ok if len == 0 */ bad: return; } @@ -175,11 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6_addr_copy(iph-daddr, final_addr); } -#ifdef CONFIG_IPV6_MIP6 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) -#else -static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) -#endif { union { struct ipv6hdr *iph; @@ -194,30 +188,12 @@ #endif while (exthdr.raw end) { switch (nexthdr) { -#ifdef CONFIG_IPV6_MIP6 - case NEXTHDR_HOP: - if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING overrun %sopts\n, - nexthdr == NEXTHDR_HOP ? - hop : dest); - return -EINVAL; - } - break; case NEXTHDR_DEST: +#ifdef CONFIG_IPV6_MIP6 if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); - if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING overrun %sopts\n, - nexthdr == NEXTHDR_HOP ? - hop : dest); - return -EINVAL; - } - break; -#else +#endif case NEXTHDR_HOP: - case NEXTHDR_DEST: if (!zero_out_mutable_opts(exthdr.opth)) { LIMIT_NETDEBUG( KERN_WARNING overrun %sopts\n, @@ -226,7 +202,6 @@ #else return -EINVAL; } break; -#endif case NEXTHDR_ROUTING: ipv6_rearrange_rthdr(iph, exthdr.rth); @@ -282,16 +257,13 @@ #endif } #ifdef CONFIG_IPV6_MIP6 memcpy(tmp_ext, top_iph-saddr, extlen); - err = ipv6_clear_mutable_options(top_iph, -extlen - sizeof(*tmp_ext) + -sizeof(*top_iph), -XFRM_POLICY_OUT); #else memcpy(tmp_ext, top_iph-daddr, extlen); +#endif err = ipv6_clear_mutable_options(top_iph, extlen - sizeof(*tmp_ext) + -sizeof(*top_iph)); -#endif
Re: [1/1] connector: add userspace example code into Documentation/connector/
On Fri, Aug 25, 2006 at 01:17:27AM -0700, David Miller ([EMAIL PROTECTED]) wrote: I mailed it myself and applied to 2.6.18 git tree - patch -p1 did not complain for sure :) GIT always complains very loudly about any trailing whitespace on any lines, patch is too dumb to do that. You do not need to use GIT trees to check this, just run: git apply --check --whitespace=error-all $PATCH and it will let you know. Hmm, how many interesting things git contain... I will definitely use this feature, thanks David. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2]d80211: fix wpa_supplicant reassoc problem
After key negotiation completed using wpa_supplicant, wpa_supplicant can't reassoc with the AP if we reboot the AP. It always fails at the 4-way handshake. The problem is the key info is not cleared correctly. Thus when wpa_supplicant send the EAPOL-KEY packet, the d80211 stack finds the old key and uses it to encrypt the packet. The patch removes the sta_info when we disassociate with AP. Thanks, Hong diff --git a/net/d80211/ieee80211_sta.c b/net/d80211/ieee80211_sta.c index 8caf352..2144b34 100644 --- a/net/d80211/ieee80211_sta.c +++ b/net/d80211/ieee80211_sta.c @@ -739,6 +739,14 @@ static void ieee80211_associated(struct wireless_send_event(dev, SIOCGIWAP, wrqu, NULL); mod_timer(ifsta-timer, jiffies + IEEE80211_MONITORING_INTERVAL + 30 * HZ); + + sta = sta_info_get(local, ifsta-bssid); + if (sta) { + sta_info_free(sta, 0); + sta_info_put(sta); + } + + ifsta-probereq_poll = 0; } else { mod_timer(ifsta-timer, jiffies + IEEE80211_MONITORING_INTERVAL); diff --git a/net/d80211/sta_info.c b/net/d80211/sta_info.c index 7f5febe..8902816 100644 --- a/net/d80211/sta_info.c +++ b/net/d80211/sta_info.c @@ -197,6 +197,12 @@ #ifdef CONFIG_D80211_VERBOSE_DEBUG local-mdev-name, MAC_ARG(sta-addr)); #endif /* CONFIG_D80211_VERBOSE_DEBUG */ + if (sta-key) { + ieee80211_key_sysfs_remove(sta-key); + ieee80211_key_free(sta-key); + sta-key = NULL; + } + rate_control_remove_sta_attrs(local, sta-rate_ctrl_priv, sta-kobj); ieee80211_sta_sysfs_remove(sta); @@ -244,8 +250,6 @@ void sta_info_free(struct sta_info *sta, kfree(key); } } - ieee80211_key_free(sta-key); - sta-key = NULL; } else if (sta-key_idx_compression != HW_KEY_IDX_INVALID) { struct ieee80211_key_conf conf; memset(conf, 0, sizeof(conf));
[PATCH 2/2]d80211: add hardware scan callback
Add hardware scan callback to support cards like ipw3945 which implements the scan command in firmware. Thanks, Hong diff --git a/include/net/d80211.h b/include/net/d80211.h index ba5cb4c..b369d12 100644 --- a/include/net/d80211.h +++ b/include/net/d80211.h @@ -595,6 +595,10 @@ struct ieee80211_hw { int (*passive_scan)(struct net_device *dev, int state, struct ieee80211_scan_conf *conf); + /* Ask the hardware to service the scan request, no need to start + * the scan state machine in stack. */ + int (*hw_scan)(struct net_device *dev, u8 *ssid, size_t len); + /* return low-level statistics */ int (*get_stats)(struct net_device *dev, struct ieee80211_low_level_stats *stats); @@ -893,6 +897,8 @@ void ieee80211_tx_led(int state, struct */ void ieee80211_rx_led(int state, struct net_device *dev); +/* set station scan completed */ +void ieee80211_set_scan_completed(struct net_device *dev); /* IEEE 802.11 defines */ diff --git a/net/d80211/ieee80211.c b/net/d80211/ieee80211.c index 60eca90..dc920c1 100644 --- a/net/d80211/ieee80211.c +++ b/net/d80211/ieee80211.c @@ -4831,6 +4831,7 @@ EXPORT_SYMBOL(sta_info_get); EXPORT_SYMBOL(sta_info_put); EXPORT_SYMBOL(ieee80211_radar_status); EXPORT_SYMBOL(ieee80211_get_mc_list_item); +EXPORT_SYMBOL(ieee80211_set_scan_completed); module_init(ieee80211_init); module_exit(ieee80211_exit); diff --git a/net/d80211/ieee80211_sta.c b/net/d80211/ieee80211_sta.c index 2144b34..4bb2234 100644 --- a/net/d80211/ieee80211_sta.c +++ b/net/d80211/ieee80211_sta.c @@ -2426,6 +2426,28 @@ static int ieee80211_active_scan(struct } +void ieee80211_set_scan_completed(struct net_device *dev) +{ + struct ieee80211_local *local = dev-ieee80211_ptr; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + union iwreq_data wrqu; + + printk(KERN_DEBUG %s: scan completed\n, dev-name); + local-sta_scanning = 0; + local-last_scan_completed = jiffies; + + memset(wrqu, 0, sizeof(wrqu)); + wireless_send_event(dev, SIOCGIWSCAN, wrqu, NULL); + + if (sdata-type == IEEE80211_IF_TYPE_IBSS) { + struct ieee80211_if_sta *ifsta = sdata-u.sta; + if (!ifsta-bssid_set || + (!ifsta-state == IEEE80211_IBSS_JOINED + !ieee80211_sta_active_ibss(dev))) + ieee80211_sta_find_ibss(dev, ifsta); + } +} + static void ieee80211_sta_scan_work(void *ptr) { struct net_device *dev = ptr; @@ -2434,7 +2456,6 @@ static void ieee80211_sta_scan_work(void struct ieee80211_hw_modes *mode; struct ieee80211_channel *chan; int skip; - union iwreq_data wrqu; unsigned long next_delay = 0; if (!local-sta_scanning) @@ -2451,20 +2472,8 @@ static void ieee80211_sta_scan_work(void operational channel after scan\n, dev-name); } - printk(KERN_DEBUG %s: scan completed\n, dev-name); - local-sta_scanning = 0; - local-last_scan_completed = jiffies; - memset(wrqu, 0, sizeof(wrqu)); - wireless_send_event(dev, SIOCGIWSCAN, wrqu, NULL); - if (sdata-type == IEEE80211_IF_TYPE_IBSS) { -struct ieee80211_sub_if_data *sdata = - IEEE80211_DEV_TO_SUB_IF(dev); -struct ieee80211_if_sta *ifsta = sdata-u.sta; -if (!ifsta-bssid_set || -(ifsta-state == IEEE80211_IBSS_JOINED - !ieee80211_sta_active_ibss(dev))) - ieee80211_sta_find_ibss(dev, ifsta); - } + + ieee80211_set_scan_completed(dev); return; } skip = !(local-enabled_modes (1 mode-mode)); @@ -2565,9 +2574,12 @@ int ieee80211_sta_req_scan(struct net_de printk(KERN_DEBUG %s: starting scan\n, dev-name); + local-sta_scanning = 1; + if (local-hw-hw_scan) + return local-hw-hw_scan(dev, ssid, ssid_len); + ieee80211_sta_save_oper_chan(dev); - local-sta_scanning = 1; /* TODO: stop TX queue? */ if (ssid) {
Re: [RFC] add nl80211
On Thu, 2006-08-24 at 19:27 +0200, Thomas Graf wrote: I'd use normal u32 attributes here as well and simply enumerate their type 1..n. int idx = 1 list_for_each_entry(drv, nl80211_drv_list, list) NLA_PUT_U32(msg, idx++, drv-wiphy); The additional header seems waste but this way you stay flexible and can extend the protocol later on. Attribute lengths are checked with an open end in mind, i.e. you can put more stuff behind that u32 in the future and your old applications will still work. You also might want to consider returning ifindex and the associated name. That'd be a list of ifindexes again... +static int nl80211_get_intfs(struct sk_buff *skb, struct genl_info *info) Try not to reuse the same attribute type for different purposes, it will force you to duplicate the validation policy for every single command and things become very error prone. I completely reworked that now so it will: * create a nested NL80211_ATTR_INTERFACE_LIST with nested { * 1..N attributes, with nested { * ATTR_IFINDEX and * ATTR_IFNAME } } how does that sound? Maybe I should do the same for the WIPHY list? i.e. create a new type ATTR_WIPHY_LIST and within that nest numbered attributes (array indexes) and in there put ATTR_WIPHY? So possibly I could also put ATTR_INTERFACE_LIST in there as well later? johannes - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 44/44] [XFRM] IPV6: Support Mobile IPv6 extension headers sorting.
Masahide NAKAMURA wrote: David Miller wrote: From: Masahide NAKAMURA [EMAIL PROTECTED] Date: Thu, 24 Aug 2006 16:05:39 +0900 David Miller wrote: In the mean time, I will work on porting my XFRM hashing changes for the current net-2.6.19 tree. FYI, your work will not have any conflict with the left of MIPv6 patches which I will describe later since they are almost out of XFRM. Great. I just finished the port and pushed all of that work to net-2.6.19, can folks please take a look? I tested IPSEC as best as I could with XFRM_SUB_POLICY enabled, but I have no way currently to test sub-policies or MIPV6 cases. OK, I will review it and also start my XFRM test with net-2.6.19 as I've done with my tree. I've found a problem about MIPv6 CN with the patch below. commit 02b0fa84daaa70f035767c9a5a0d539667249e60 Author: David S. Miller [EMAIL PROTECTED] Date: Thu Aug 24 04:45:07 2006 -0700 [XFRM]: Hash policies when non-prefixed. It seems that the policy hashing is not always used with selector protocol. It may conflict with MIPL daemon thought. Let me explain the detail: MIPv6 specification says that all mobility header(MH) must be sent without routing header type 2(RT2) / home address option, except [*1]. To satisfy it MIPL daemon uses some bypass policies. For CN outbound example(ip command output): (a)MIPL daemon adds MH bypass policy when it starts to run: src ::/0 dst ::/0 proto 135 dir out priority 12 ptype sub (b)After binding is accepted, it also adds route optimization policy to send user traffic with RT2: src 3ffe:501::100::/128 dst 3ffe:501::101::/128 dir out priority 16 ptype sub tmpl src :: dst :: proto route2 reqid 0 mode ro level use When the daemon added both policy we expected that all MH was used (a) otherwise (b) because of priority order. But the kernel used (b) when the daemon sent MH from 3ffe:501::100:: to 3ffe:501::101::. Note: such bypasses are also required for ICMPv6 error and neighbor discovery. (*1: Binding update(BU) can be sent with home address option and binding ack(BA) can be sent with RT2.) Do you have any ideas? Thanks, -- Masahide NAKAMURA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 44/44] [XFRM] IPV6: Support Mobile IPv6 extension headers sorting.
From: Masahide NAKAMURA [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 19:06:40 +0900 I've found a problem about MIPv6 CN with the patch below. We just need to search by priority in the inexact list, even if we get a hit in the hash table. The fix is trivial, please try this patch: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 200e6e5..060f115 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -908,6 +908,7 @@ static struct xfrm_policy *xfrm_policy_l xfrm_address_t *daddr, *saddr; struct hlist_node *entry; struct hlist_head *chain; + u32 priority = ~0U; daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); @@ -919,21 +920,21 @@ static struct xfrm_policy *xfrm_policy_l ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_policy_match(pol, fl, type, family, dir)) { - xfrm_pol_hold(pol); ret = pol; + priority = ret-priority; break; } } - if (!ret) { - chain = xfrm_policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { - if (xfrm_policy_match(pol, fl, type, family, dir)) { - xfrm_pol_hold(pol); - ret = pol; - break; - } + chain = xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir) + pol-priority priority) { + ret = pol; + break; } } + if (ret) + xfrm_pol_hold(ret); read_unlock_bh(xfrm_policy_lock); return ret; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[NET_SCHED]: Add mask support to fwmark classifier
This patch adds support to mask the nfmark value before the lookup the the fw classifier. Unfortunately it has some drawbacks, so I'd be interested if anyone can think of a better way. The problem is that in order to avoid walking through all filters contained in one instance, we need to mask the value before the lookup. This means all filters share the same mask, which is taken from the first filter created and stored in the filter head. The user interface however always refers to a single filter, not the head, so it can't be changed afterwards unless we just overwrite it whenever a new filter is installed. Both is not really perfect. The current patch doesn't allow to change the mark and enforces that all filters use the same one, which I think is better than allowing inconsistent configurations. Any better ideas? [NET_SCHED]: Add mask support to fwmark classifier Support masking the nfmark value before the search. The mask value is global for all filters contained in one instance. It can only be set when a new instance is created, all filters must specify the same mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] --- commit c7dff54dc2dca206ff54f66bfce290c49f98a3c8 tree 88d48096f13674f29413dc5c9853c7d0a8c5feac parent e5d8ce21a2261f73b078d802bd2ab3508153b177 author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 12:03:19 +0200 committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 12:03:19 +0200 include/linux/pkt_cls.h |1 + net/sched/cls_fw.c | 25 - 2 files changed, 25 insertions(+), 1 deletions(-) diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index bd2c5a2..c3f01b3 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -305,6 +305,7 @@ enum TCA_FW_POLICE, TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, __TCA_FW_MAX }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index e6973d9..c9385dc 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -50,6 +50,7 @@ #define HTSIZE (PAGE_SIZE/sizeof(struct struct fw_head { struct fw_filter *ht[HTSIZE]; + u32 mask; }; struct fw_filter @@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *s struct fw_filter *f; int r; #ifdef CONFIG_NETFILTER - u32 id = skb-nfmark; + u32 id = skb-nfmark head-mask; #else u32 id = 0; #endif @@ -209,7 +210,9 @@ static int fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, struct rtattr **tb, struct rtattr **tca, unsigned long base) { + struct fw_head *head = (struct fw_head*)tp-root; struct tcf_exts e; + u32 mask; int err; err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], e, fw_ext_map); @@ -232,6 +235,15 @@ #ifdef CONFIG_NET_CLS_IND } #endif /* CONFIG_NET_CLS_IND */ + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + goto errout; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + if (mask != head-mask) + goto errout; + } else if (head-mask != 0x) + goto errout; + tcf_exts_change(tp, f-exts, e); return 0; @@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *t return -EINVAL; if (head == NULL) { + u32 mask = 0x; + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + return -EINVAL; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + } + head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; + head-mask = mask; tcf_tree_lock(tp); tp-root = head; @@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp static int fw_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { + struct fw_head *head = (struct fw_head *)tp-root; struct fw_filter *f = (struct fw_filter*)fh; unsigned char*b = skb-tail; struct rtattr *rta; @@ -351,6 +372,8 @@ #ifdef CONFIG_NET_CLS_IND if (strlen(f-indev)) RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f-indev); #endif /* CONFIG_NET_CLS_IND */ + if (head-mask != 0x) + RTA_PUT(skb, TCA_FW_MASK, 4, head-mask); if (tcf_exts_dump(skb, f-exts, fw_ext_map) 0) goto rtattr_failure;
Re: [RFC] add nl80211
* Johannes Berg [EMAIL PROTECTED] 2006-08-25 11:04 I completely reworked that now so it will: * create a nested NL80211_ATTR_INTERFACE_LIST with nested { * 1..N attributes, with nested { * ATTR_IFINDEX and * ATTR_IFNAME } } how does that sound? Maybe I should do the same for the WIPHY list? i.e. create a new type ATTR_WIPHY_LIST and within that nest numbered attributes (array indexes) and in there put ATTR_WIPHY? So possibly I could also put ATTR_INTERFACE_LIST in there as well later? That's exactly what I would have done as well. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] add nl80211
On Fri, 2006-08-25 at 12:30 +0200, Thomas Graf wrote: That's exactly what I would have done as well. Alright. Changing it, then I'll repost. Again :) johannes - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take13 1/3] kevent: Core files.
On Fri, 25 Aug 2006 09:48:15 +0400 Evgeniy Polyakov [EMAIL PROTECTED] wrote: kmalloc is really slow actually - it always shows somewhere on top in profiles and brings noticeble overhead It shouldn't. Please describe the workload and send the profiles. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take13 1/3] kevent: Core files.
On Thu, Aug 24, 2006 at 11:20:24PM -0700, Andrew Morton ([EMAIL PROTECTED]) wrote: On Fri, 25 Aug 2006 09:48:15 +0400 Evgeniy Polyakov [EMAIL PROTECTED] wrote: kmalloc is really slow actually - it always shows somewhere on top in profiles and brings noticeble overhead It shouldn't. Please describe the workload and send the profiles. epoll based trivial server (accept + sendfile for the same file, about 4k), httperf with big amount of simulateneous connections. 3c59x NIC (with e1000 there were no ioreads and netif_rx). __alloc_skb calls kmem_cache_alloc() and ___kmalloc(). 16158 1.3681 ioread16 8073 0.6835 ioread32 3485 0.2951 irq_entries_start 3018 0.2555 _spin_lock 2103 0.1781 tcp_v4_rcv 1503 0.1273 sysenter_past_esp 1492 0.1263 netif_rx 1459 0.1235 skb_copy_bits 1422 0.1204 _spin_lock_irqsave 1145 0.0969 ip_route_input 983 0.0832 kmem_cache_free 964 0.0816 __alloc_skb 926 0.0784 common_interrupt 891 0.0754 __do_IRQ 846 0.0716 _read_lock 826 0.0699 __netif_rx_schedule 806 0.0682 __kmalloc 767 0.0649 do_tcp_sendpages 747 0.0632 __copy_to_user_ll 744 0.0630 pskb_expand_head -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take13 1/3] kevent: Core files.
On Fri, 25 Aug 2006 10:32:38 +0400 Evgeniy Polyakov [EMAIL PROTECTED] wrote: On Thu, Aug 24, 2006 at 11:20:24PM -0700, Andrew Morton ([EMAIL PROTECTED]) wrote: On Fri, 25 Aug 2006 09:48:15 +0400 Evgeniy Polyakov [EMAIL PROTECTED] wrote: kmalloc is really slow actually - it always shows somewhere on top in profiles and brings noticeble overhead It shouldn't. Please describe the workload and send the profiles. epoll based trivial server (accept + sendfile for the same file, about 4k), httperf with big amount of simulateneous connections. 3c59x NIC (with e1000 there were no ioreads and netif_rx). __alloc_skb calls kmem_cache_alloc() and ___kmalloc(). 16158 1.3681 ioread16 8073 0.6835 ioread32 3485 0.2951 irq_entries_start 3018 0.2555 _spin_lock 2103 0.1781 tcp_v4_rcv 1503 0.1273 sysenter_past_esp 1492 0.1263 netif_rx 1459 0.1235 skb_copy_bits 1422 0.1204 _spin_lock_irqsave 1145 0.0969 ip_route_input 983 0.0832 kmem_cache_free 964 0.0816 __alloc_skb 926 0.0784 common_interrupt 891 0.0754 __do_IRQ 846 0.0716 _read_lock 826 0.0699 __netif_rx_schedule 806 0.0682 __kmalloc 767 0.0649 do_tcp_sendpages 747 0.0632 __copy_to_user_ll 744 0.0630 pskb_expand_head That doesn't look too bad. What's that as a percentage of total user+system time? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take13 1/3] kevent: Core files.
From: Andrew Morton [EMAIL PROTECTED] Date: Thu, 24 Aug 2006 23:20:24 -0700 On Fri, 25 Aug 2006 09:48:15 +0400 Evgeniy Polyakov [EMAIL PROTECTED] wrote: kmalloc is really slow actually - it always shows somewhere on top in profiles and brings noticeble overhead It shouldn't. Please describe the workload and send the profiles. Not that I can account for the problem in this specific case, in my experience cutting down kmalloc() calls matters a _lot_ performance wise. For example, this is why we allocate TCP sockets as one huge blob instead of 3 seperate allocations (generic socket, IP socket, TCP socket). In fact, one of the remaining performance issues in IPSEC rule creation is that we allocate seperately hunks of memory for the rule's encryption state, the optional hash algorithm state, etc. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take13 1/3] kevent: Core files.
On Fri, 25 Aug 2006 00:01:06 -0700 (PDT) David Miller [EMAIL PROTECTED] wrote: From: Andrew Morton [EMAIL PROTECTED] Date: Thu, 24 Aug 2006 23:20:24 -0700 On Fri, 25 Aug 2006 09:48:15 +0400 Evgeniy Polyakov [EMAIL PROTECTED] wrote: kmalloc is really slow actually - it always shows somewhere on top in profiles and brings noticeble overhead It shouldn't. Please describe the workload and send the profiles. Not that I can account for the problem in this specific case, in my experience cutting down kmalloc() calls matters a _lot_ performance wise. For example, this is why we allocate TCP sockets as one huge blob instead of 3 seperate allocations (generic socket, IP socket, TCP socket). In fact, one of the remaining performance issues in IPSEC rule creation is that we allocate seperately hunks of memory for the rule's encryption state, the optional hash algorithm state, etc. Part of that will be cache sharing between the three structs though. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [take13 1/3] kevent: Core files.
On Thu, Aug 24, 2006 at 11:58:59PM -0700, Andrew Morton ([EMAIL PROTECTED]) wrote: kmalloc is really slow actually - it always shows somewhere on top in profiles and brings noticeble overhead It shouldn't. Please describe the workload and send the profiles. epoll based trivial server (accept + sendfile for the same file, about 4k), httperf with big amount of simulateneous connections. 3c59x NIC (with e1000 there were no ioreads and netif_rx). __alloc_skb calls kmem_cache_alloc() and ___kmalloc(). 16158 1.3681 ioread16 8073 0.6835 ioread32 3485 0.2951 irq_entries_start 3018 0.2555 _spin_lock 2103 0.1781 tcp_v4_rcv 1503 0.1273 sysenter_past_esp 1492 0.1263 netif_rx 1459 0.1235 skb_copy_bits 1422 0.1204 _spin_lock_irqsave 1145 0.0969 ip_route_input 983 0.0832 kmem_cache_free 964 0.0816 __alloc_skb 926 0.0784 common_interrupt 891 0.0754 __do_IRQ 846 0.0716 _read_lock 826 0.0699 __netif_rx_schedule 806 0.0682 __kmalloc 767 0.0649 do_tcp_sendpages 747 0.0632 __copy_to_user_ll 744 0.0630 pskb_expand_head That doesn't look too bad. What's that as a percentage of total user+system time? With e1000 allocations take more time than actual TCP processing, so it rised some suspicious for me (especially in bulk transfer). Total time is about 7 times more than system one, user time is much less than system one (about 20 times less, but test duration was not too long, so it can vary). I do not say it is bad, but it is noticeble and should be eliminated if there are no requirements to have it. -- Evgeniy Polyakov - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[take14 0/3] kevent: Generic event handling mechanism.
Generic event handling mechanism. Changes from 'take13' patchset: * do not get lock aroung user data check in __kevent_search() * fail early if there were no registered callbacks for given type of kevent * trailing whitespace cleanup Changes from 'take12' patchset: * remove non-chardev interface for initialization * use pointer to kevent_mring instead of unsigned longs * use aligned 64bit type in raw user data (can be used by high-res timer if needed) * simplified enqueue/dequeue callbacks and kevent initialization * use nanoseconds for timeout * put number of milliseconds into timer's return data * move some definitions into user-visible header * removed filenames from comments Changes from 'take11' patchset: * include missing headers into patchset * some trivial code cleanups (use goto instead of if/else games and so on) * some whitespace cleanups * check for ready_callback() callback before main loop which should save us some ticks Changes from 'take10' patchset: * removed non-existent prototypes * added helper function for kevent_registered_callbacks * fixed 80 lines comments issues * added shared between userspace and kernelspace header instead of embedd them in one * core restructuring to remove forward declarations * s o m e w h i t e s p a c e c o d y n g s t y l e c l e a n u p * use vm_insert_page() instead of remap_pfn_range() Changes from 'take9' patchset: * fixed -nopage method Changes from 'take8' patchset: * fixed mmap release bug * use module_init() instead of late_initcall() * use better structures for timer notifications Changes from 'take7' patchset: * new mmap interface (not tested, waiting for other changes to be acked) - use nopage() method to dynamically substitue pages - allocate new page for events only when new added kevent requres it - do not use ugly index dereferencing, use structure instead - reduced amount of data in the ring (id and flags), maximum 12 pages on x86 per kevent fd Changes from 'take6' patchset: * a lot of comments! * do not use list poisoning for detection of the fact, that entry is in the list * return number of ready kevents even if copy*user() fails * strict check for number of kevents in syscall * use ARRAY_SIZE for array size calculation * changed superblock magic number * use SLAB_PANIC instead of direct panic() call * changed -E* return values * a lot of small cleanups and indent fixes Changes from 'take5' patchset: * removed compilation warnings about unused wariables when lockdep is not turned on * do not use internal socket structures, use appropriate (exported) wrappers instead * removed default 1 second timeout * removed AIO stuff from patchset Changes from 'take4' patchset: * use miscdevice instead of chardevice * comments fixes Changes from 'take3' patchset: * removed serializing mutex from kevent_user_wait() * moved storage list processing to RCU * removed lockdep screaming - all storage locks are initialized in the same function, so it was learned to differentiate between various cases * remove kevent from storage if is marked as broken after callback * fixed a typo in mmaped buffer implementation which would end up in wrong index calcualtion Changes from 'take2' patchset: * split kevent_finish_user() to locked and unlocked variants * do not use KEVENT_STAT ifdefs, use inline functions instead * use array of callbacks of each type instead of each kevent callback initialization * changed name of ukevent guarding lock * use only one kevent lock in kevent_user for all hash buckets instead of per-bucket locks * do not use kevent_user_ctl structure instead provide needed arguments as syscall parameters * various indent cleanups * added optimisation, which is aimed to help when a lot of kevents are being copied from userspace * mapped buffer (initial) implementation (no userspace yet) Changes from 'take1' patchset: - rebased against 2.6.18-git tree - removed ioctl controlling - added new syscall kevent_get_events(int fd, unsigned int min_nr, unsigned int max_nr, unsigned int timeout, void __user *buf, unsigned flags) - use old syscall kevent_ctl for creation/removing, modification and initial kevent initialization - use mutuxes instead of semaphores - added file descriptor check and return error if provided descriptor does not match kevent file operations - various indent fixes - removed aio_sendfile() declarations. Thank you. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[take14 2/3] kevent: poll/select() notifications.
poll/select() notifications. This patch includes generic poll/select and timer notifications. kevent_poll works simialr to epoll and has the same issues (callback is invoked not from internal state machine of the caller, but through process awake). Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] diff --git a/include/linux/fs.h b/include/linux/fs.h index 2561020..76b3039 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -236,6 +236,7 @@ #include linux/prio_tree.h #include linux/init.h #include linux/sched.h #include linux/mutex.h +#include linux/kevent.h #include asm/atomic.h #include asm/semaphore.h @@ -698,6 +699,9 @@ #ifdef CONFIG_EPOLL struct list_headf_ep_links; spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ +#ifdef CONFIG_KEVENT_POLL + struct kevent_storage st; +#endif struct address_space*f_mapping; }; extern spinlock_t files_lock; diff --git a/kernel/kevent/kevent_poll.c b/kernel/kevent/kevent_poll.c new file mode 100644 index 000..fb74e0f --- /dev/null +++ b/kernel/kevent/kevent_poll.c @@ -0,0 +1,222 @@ +/* + * 2006 Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/kernel.h +#include linux/types.h +#include linux/list.h +#include linux/slab.h +#include linux/spinlock.h +#include linux/timer.h +#include linux/file.h +#include linux/kevent.h +#include linux/poll.h +#include linux/fs.h + +static kmem_cache_t *kevent_poll_container_cache; +static kmem_cache_t *kevent_poll_priv_cache; + +struct kevent_poll_ctl +{ + struct poll_table_structpt; + struct kevent *k; +}; + +struct kevent_poll_wait_container +{ + struct list_headcontainer_entry; + wait_queue_head_t *whead; + wait_queue_twait; + struct kevent *k; +}; + +struct kevent_poll_private +{ + struct list_headcontainer_list; + spinlock_t container_lock; +}; + +static int kevent_poll_enqueue(struct kevent *k); +static int kevent_poll_dequeue(struct kevent *k); +static int kevent_poll_callback(struct kevent *k); + +static int kevent_poll_wait_callback(wait_queue_t *wait, + unsigned mode, int sync, void *key) +{ + struct kevent_poll_wait_container *cont = + container_of(wait, struct kevent_poll_wait_container, wait); + struct kevent *k = cont-k; + struct file *file = k-st-origin; + u32 revents; + + revents = file-f_op-poll(file, NULL); + + kevent_storage_ready(k-st, NULL, revents); + + return 0; +} + +static void kevent_poll_qproc(struct file *file, wait_queue_head_t *whead, + struct poll_table_struct *poll_table) +{ + struct kevent *k = + container_of(poll_table, struct kevent_poll_ctl, pt)-k; + struct kevent_poll_private *priv = k-priv; + struct kevent_poll_wait_container *cont; + unsigned long flags; + + cont = kmem_cache_alloc(kevent_poll_container_cache, SLAB_KERNEL); + if (!cont) { + kevent_break(k); + return; + } + + cont-k = k; + init_waitqueue_func_entry(cont-wait, kevent_poll_wait_callback); + cont-whead = whead; + + spin_lock_irqsave(priv-container_lock, flags); + list_add_tail(cont-container_entry, priv-container_list); + spin_unlock_irqrestore(priv-container_lock, flags); + + add_wait_queue(whead, cont-wait); +} + +static int kevent_poll_enqueue(struct kevent *k) +{ + struct file *file; + int err, ready = 0; + unsigned int revents; + struct kevent_poll_ctl ctl; + struct kevent_poll_private *priv; + + file = fget(k-event.id.raw[0]); + if (!file) + return -ENODEV; + + err = -EINVAL; + if (!file-f_op || !file-f_op-poll) + goto err_out_fput; + + err = -ENOMEM; + priv = kmem_cache_alloc(kevent_poll_priv_cache, SLAB_KERNEL); + if (!priv) + goto err_out_fput; + + spin_lock_init(priv-container_lock); + INIT_LIST_HEAD(priv-container_list); + + k-priv = priv; + + ctl.k = k; + init_poll_funcptr(ctl.pt, kevent_poll_qproc); + + err = kevent_storage_enqueue(file-st, k); + if (err) + goto err_out_free; + + revents = file-f_op-poll(file, ctl.pt); + if
[take14 1/3] kevent: Core files.
Core files. This patch includes core kevent files: - userspace controlling - kernelspace interfaces - initialization - notification state machines Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index dd63d47..091ff42 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -317,3 +317,5 @@ ENTRY(sys_call_table) .long sys_tee /* 315 */ .long sys_vmsplice .long sys_move_pages + .long sys_kevent_get_events + .long sys_kevent_ctl diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 5d4a7d1..b2af4a8 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -713,4 +713,6 @@ #endif .quad sys_tee .quad compat_sys_vmsplice .quad compat_sys_move_pages + .quad sys_kevent_get_events + .quad sys_kevent_ctl ia32_syscall_end: diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index fc1c8dd..c9dde13 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -323,10 +323,12 @@ #define __NR_sync_file_range 314 #define __NR_tee 315 #define __NR_vmsplice 316 #define __NR_move_pages317 +#define __NR_kevent_get_events 318 +#define __NR_kevent_ctl319 #ifdef __KERNEL__ -#define NR_syscalls 318 +#define NR_syscalls 320 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 94387c9..61363e0 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -619,10 +619,14 @@ #define __NR_vmsplice 278 __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages279 __SYSCALL(__NR_move_pages, sys_move_pages) +#define __NR_kevent_get_events 280 +__SYSCALL(__NR_kevent_get_events, sys_kevent_get_events) +#define __NR_kevent_ctl281 +__SYSCALL(__NR_kevent_ctl, sys_kevent_ctl) #ifdef __KERNEL__ -#define __NR_syscall_max __NR_move_pages +#define __NR_syscall_max __NR_kevent_ctl #ifndef __NO_STUBS diff --git a/include/linux/kevent.h b/include/linux/kevent.h new file mode 100644 index 000..de33ec7 --- /dev/null +++ b/include/linux/kevent.h @@ -0,0 +1,173 @@ +/* + * 2006 Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __KEVENT_H +#define __KEVENT_H +#include linux/types.h +#include linux/list.h +#include linux/spinlock.h +#include linux/mutex.h +#include linux/wait.h +#include linux/net.h +#include linux/rcupdate.h +#include linux/kevent_storage.h +#include linux/ukevent.h + +#define KEVENT_MIN_BUFFS_ALLOC 3 + +struct kevent; +struct kevent_storage; +typedef int (* kevent_callback_t)(struct kevent *); + +/* @callback is called each time new event has been caught. */ +/* @enqueue is called each time new event is queued. */ +/* @dequeue is called each time event is dequeued. */ + +struct kevent_callbacks { + kevent_callback_t callback, enqueue, dequeue; +}; + +#define KEVENT_READY 0x1 +#define KEVENT_STORAGE 0x2 +#define KEVENT_USER0x4 + +struct kevent +{ + /* Used for kevent freeing.*/ + struct rcu_head rcu_head; + struct ukevent event; + /* This lock protects ukevent manipulations, e.g. ret_flags changes. */ + spinlock_t ulock; + + /* Entry of user's queue. */ + struct list_headkevent_entry; + /* Entry of origin's queue. */ + struct list_headstorage_entry; + /* Entry of user's ready. */ + struct list_headready_entry; + + u32 flags; + + /* User who requested this kevent. */ + struct kevent_user *user; + /* Kevent container. */ + struct kevent_storage *st; + + struct kevent_callbacks callbacks; + + /* Private data for different storages. +* poll()/select storage has a list of wait_queue_t containers +* for each -poll() { poll_wait()' } here. +*/ + void*priv; +}; + +#define
[take14 3/3] kevent: Timer notifications.
Timer notifications. Timer notifications can be used for fine grained per-process time management, since interval timers are very inconvenient to use, and they are limited. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] diff --git a/kernel/kevent/kevent_timer.c b/kernel/kevent/kevent_timer.c new file mode 100644 index 000..b2fee61 --- /dev/null +++ b/kernel/kevent/kevent_timer.c @@ -0,0 +1,105 @@ +/* + * 2006 Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include linux/kernel.h +#include linux/types.h +#include linux/list.h +#include linux/slab.h +#include linux/spinlock.h +#include linux/timer.h +#include linux/jiffies.h +#include linux/kevent.h + +struct kevent_timer +{ + struct timer_list ktimer; + struct kevent_storage ktimer_storage; +}; + +static void kevent_timer_func(unsigned long data) +{ + struct kevent *k = (struct kevent *)data; + struct timer_list *t = k-st-origin; + + kevent_storage_ready(k-st, NULL, KEVENT_MASK_ALL); + mod_timer(t, jiffies + msecs_to_jiffies(k-event.id.raw[0])); +} + +static struct lock_class_key kevent_timer_key; + +static int kevent_timer_enqueue(struct kevent *k) +{ + int err; + struct kevent_timer *t; + + t = kmalloc(sizeof(struct kevent_timer), GFP_KERNEL); + if (!t) + return -ENOMEM; + + setup_timer(t-ktimer, kevent_timer_func, (unsigned long)k); + + err = kevent_storage_init(t-ktimer, t-ktimer_storage); + if (err) + goto err_out_free; + lockdep_set_class(t-ktimer_storage.lock, kevent_timer_key); + + err = kevent_storage_enqueue(t-ktimer_storage, k); + if (err) + goto err_out_st_fini; + + mod_timer(t-ktimer, jiffies + msecs_to_jiffies(k-event.id.raw[0])); + + return 0; + +err_out_st_fini: + kevent_storage_fini(t-ktimer_storage); +err_out_free: + kfree(t); + + return err; +} + +static int kevent_timer_dequeue(struct kevent *k) +{ + struct kevent_storage *st = k-st; + struct kevent_timer *t = container_of(st, struct kevent_timer, ktimer_storage); + + del_timer_sync(t-ktimer); + kevent_storage_dequeue(st, k); + kfree(t); + + return 0; +} + +static int kevent_timer_callback(struct kevent *k) +{ + k-event.ret_data[0] = jiffies_to_msecs(jiffies); + return 1; +} + +static int __init kevent_init_timer(void) +{ + struct kevent_callbacks tc = { + .callback = kevent_timer_callback, + .enqueue = kevent_timer_enqueue, + .dequeue = kevent_timer_dequeue}; + + return kevent_add_callbacks(tc, KEVENT_TIMER); +} +module_init(kevent_init_timer); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH 2/9] deadlock prevention core
Hi! - We expect that the lots-of-dirty-anon-memory-over-swap-over-network scenario might still cause deadlocks. I assert that this can be solved by putting swap on local disks. Peter asserts that this isn't acceptable due to disk unreliability. I point out that local disk reliability can be increased via MD, all goes quiet. Putting swap on local disks really messes up the concept of stateless servers. I suppose you can do some sort of swap encryption, but otherwise you need to scrub the swap partition on boot if you re-purpose the hardware. You also then need to do hardware configuration to make sure the local disks are all setup the same way across all server platforms so the common images can boot. We should really encrypt swap with random key generated at boot, for all the machine. I believe it is possible (with some non-trivial setup) today, but it would be nice to do it automagically. Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NET_SCHED]: Add mask support to fwmark classifier
* Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29 This patch adds support to mask the nfmark value before the lookup the the fw classifier. Unfortunately it has some drawbacks, so I'd be interested if anyone can think of a better way. The problem is that in order to avoid walking through all filters contained in one instance, we need to mask the value before the lookup. This means all filters share the same mask, which is taken from the first filter created and stored in the filter head. The user interface however always refers to a single filter, not the head, so it can't be changed afterwards unless we just overwrite it whenever a new filter is installed. Both is not really perfect. The current patch doesn't allow to change the mark and enforces that all filters use the same one, which I think is better than allowing inconsistent configurations. The other option gets down to replacing the hash table with a list and that's not an option in my opinion. This looks very good to me. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] remove third bogus argument from NLA_PUT_FLAG
This patch removes the 'value' argument from NLA_PUT_FLAG which is unused anyway. The documentation comment was already correct so it doesn't need an update :) Signed-off-by: Johannes Berg [EMAIL PROTECTED] --- wireless-dev.orig/include/net/netlink.h 2006-08-25 12:46:30.0 +0200 +++ wireless-dev/include/net/netlink.h 2006-08-25 12:46:38.0 +0200 @@ -758,7 +758,7 @@ static inline int nla_put_msecs(struct s #define NLA_PUT_STRING(skb, attrtype, value) \ NLA_PUT(skb, attrtype, strlen(value) + 1, value) -#define NLA_PUT_FLAG(skb, attrtype, value) \ +#define NLA_PUT_FLAG(skb, attrtype) \ NLA_PUT(skb, attrtype, 0, NULL) #define NLA_PUT_MSECS(skb, attrtype, jiffies) \ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC take3] add nl80211
This patch adds nl80211, a netlink based configuration system for wireless hardware. It currently features a few helper commands and commands to add and remove virtual interfaces and to inject packets. Support for nl80211 in d80211 is in a follow-up patch. There should be support for notifications, but we need to figure out if we remove the sysfs based add/remove virtual interface thing completely or allow the driver to create a notification through some new API here. It requires the patches in http://marc.theaimsgroup.com/?l=linux-netdevm=115625436628696w=2 and http://marc.theaimsgroup.com/?l=linux-netdevm=115625168405439w=2 (the latter doesn't apply cleanly against wireless-dev, but you can safely ignore the pieces that don't, at least for wireless testing :) ) It also requires the NLA_PUT_FLAG patch I did: http://marc.theaimsgroup.com/?l=linux-netdevm=115650333420169w=2 Signed-off-by: Johannes Berg [EMAIL PROTECTED] --- /dev/null 1970-01-01 00:00:00.0 + +++ wireless-dev/include/net/nl80211.h 2006-08-25 12:51:14.0 +0200 @@ -0,0 +1,83 @@ +#ifndef __NET_NL80211_H +#define __NET_NL80211_H + +#include linux/netlink.h +#include linux/nl80211.h +#include linux/skbuff.h +#include linux/netdevice.h +#include net/genetlink.h + +/* + * 802.11 netlink in-kernel interface + * + * Copyright 2006 Johannes Berg [EMAIL PROTECTED] + */ + +/** + * struct nl80211_ops - backend description for wireless configuration + * + * This struct is registered by fullmac card drivers and/or wireless stacks + * in order to handle configuration requests on their interfaces. + * + * The priv pointer passed to each call is the pointer that was + * registered in nl80211_register_driver(). + * + * All callbacks except where otherwise noted should return 0 + * on success or a negative error code. + * + * @list_interfaces: for each interfaces belonging to the wiphy identified + * by the priv pointer, call the one() function with the + * given data and the ifindex. This callback is required. + * + * @inject_packet: inject the given frame with the NL80211_FLAG_* + *flags onto the given queue. + * + * @add_virtual_intf: create a new virtual interface with the given name + * + * @del_virtual_intf: remove the virtual interface determined by ifindex. + */ +struct nl80211_ops { + int (*list_interfaces)(void *priv, void *data, + int (*one)(void *data, int ifindex)); + int (*inject_packet)(void *priv, void *frame, int framelen, +u32 flags, int queue); + + int (*add_virtual_intf)(void *priv, char *name, + unsigned int type); + int (*del_virtual_intf)(void *priv, int ifindex); + + /* more things to be added... +* +* for a (*configure)(...) call I'd probably guess that the +* best bet would be to have one call that returns all +* possible options, one that sets them based on the +* struct genl_info *info, and one for that optimised +* set-at-once thing. +*/ +}; + +/* + * register a given method structure with the nl80211 system + * and associate the 'priv' pointer with it. + * + * Returns a positive wiphy index or a negative error code. + * + * NOTE: for proper operation, this priv pointer MUST also be + * assigned to each struct net_device's @ieee80211_ptr member! + */ +extern int nl80211_register(struct nl80211_ops *ops, void *priv); +/* + * unregister a device with the given priv pointer. + * After this call, no more requests can be made with this priv + * pointer, but the call may sleep to wait for an outstanding + * request that is being handled. + */ +extern void nl80211_unregister(void *priv); + +/* helper functions */ +extern void *nl80211hdr_put(struct sk_buff *skb, u32 pid, + u32 seq, int flags, u8 cmd); +extern void *nl80211msg_new(struct sk_buff **skb, u32 pid, + u32 seq, int flags, u8 cmd); + +#endif /* __NET_NL80211_H */ --- wireless-dev.orig/net/Kconfig 2006-08-25 12:51:09.0 +0200 +++ wireless-dev/net/Kconfig2006-08-25 12:51:14.0 +0200 @@ -250,6 +250,9 @@ source net/ieee80211/Kconfig config WIRELESS_EXT bool +config NETLINK_80211 + tristate + endif # if NET endmenu # Networking --- wireless-dev.orig/net/Makefile 2006-08-25 12:51:09.0 +0200 +++ wireless-dev/net/Makefile 2006-08-25 12:51:14.0 +0200 @@ -44,6 +44,7 @@ obj-$(CONFIG_ECONET) += econet/ obj-$(CONFIG_VLAN_8021Q) += 8021q/ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ +obj-$(CONFIG_NETLINK_80211)+= wireless/ obj-$(CONFIG_D80211) += d80211/ obj-$(CONFIG_IEEE80211)+= ieee80211/ obj-$(CONFIG_TIPC) += tipc/ --- /dev/null 1970-01-01 00:00:00.0 + +++
[RFC take3] make d80211 use nl80211
This patch makes d80211 partially configurable using the infrastructure that nl80211 provides. So far, it allows packet injection and adding/removing virtual interfaces. Signed-off-by: Johannes Berg [EMAIL PROTECTED] --- wireless-dev.orig/net/d80211/Kconfig2006-08-25 11:31:01.0 +0200 +++ wireless-dev/net/d80211/Kconfig 2006-08-25 11:32:38.0 +0200 @@ -3,6 +3,7 @@ config D80211 select CRYPTO select CRYPTO_ARC4 select CRYPTO_AES + select NETLINK_80211 ---help--- This option enables the hardware independent IEEE 802.11 networking stack. --- wireless-dev.orig/net/d80211/Makefile 2006-08-25 11:31:01.0 +0200 +++ wireless-dev/net/d80211/Makefile2006-08-25 11:32:38.0 +0200 @@ -8,6 +8,7 @@ obj-$(CONFIG_D80211) += 80211.o rate_con sta_info.o \ wep.o \ wpa.o \ + ieee80211_cfg.o \ ieee80211_scan.o \ ieee80211_sta.o \ ieee80211_dev.o \ --- wireless-dev.orig/net/d80211/ieee80211.c2006-08-25 11:31:01.0 +0200 +++ wireless-dev/net/d80211/ieee80211.c 2006-08-25 11:32:38.0 +0200 @@ -20,6 +20,7 @@ #include net/iw_handler.h #include linux/compiler.h #include linux/bitmap.h +#include linux/nl80211.h #include net/d80211.h #include net/d80211_common.h @@ -32,6 +33,7 @@ #include wme.h #include aes_ccm.h #include ieee80211_led.h +#include ieee80211_cfg.h /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ @@ -354,6 +356,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee8021 { struct rate_control_extra extra; + /* FIXME + if (tx-dev == tx-local-mdev + (inject rate set)) { + a + tx-u.tx.rate = ... + etc etc + return TXRX_CONTINUE; + } + */ + memset(extra, 0, sizeof(extra)); extra.mgmt_data = tx-sdata tx-sdata-type == IEEE80211_IF_TYPE_MGMT; @@ -759,6 +771,13 @@ ieee80211_tx_h_misc(struct ieee80211_txr u16 dur; struct ieee80211_tx_control *control = tx-u.tx.control; + /* FIXME + if (tx-dev == tx-local-mdev) { + set up retry limit, ... + based on injection parameters + } + */ + if (!is_multicast_ether_addr(hdr-addr1)) { if (tx-skb-len + FCS_LEN tx-local-rts_threshold tx-local-rts_threshold IEEE80211_MAX_RTS_THRESHOLD) { @@ -884,6 +903,9 @@ ieee80211_tx_h_check_assoc(struct ieee80 #endif /* CONFIG_D80211_VERBOSE_DEBUG */ u32 sta_flags; + if (unlikely(tx-dev == tx-local-mdev)) + return TXRX_CONTINUE; + if (unlikely(tx-local-sta_scanning != 0) ((tx-fc IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || (tx-fc IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) @@ -987,6 +1009,12 @@ static void purge_old_ps_buffers(struct static inline ieee80211_txrx_result ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) { + /* FIXME + if (unlikely(tx-dev == tx-local-mdev + (inject flags) NL80211_FLAG_NOBUFFER)) + return TXRX_CONTINUE; + */ + /* broadcast/multicast frame */ /* If any of the associated stations is in power save mode, * the frame is buffered to be sent after DTIM beacon frame */ @@ -1414,11 +1442,12 @@ static int ieee80211_master_start_xmit(s control.ifindex = odev-ifindex; control.type = osdata-type; - control.req_tx_status = pkt_data-req_tx_status; - control.do_not_encrypt = pkt_data-do_not_encrypt; + control.req_tx_status = !!(pkt_data-flags NL80211_FLAG_TXSTATUS); + control.do_not_encrypt = !(pkt_data-flags NL80211_FLAG_ENCRYPT); control.pkt_type = - pkt_data-pkt_probe_resp ? PKT_PROBE_RESP : PKT_NORMAL; - control.requeue = pkt_data-requeue; + (pkt_data-internal_flags TX_FLAG_PROBERESP) ? + PKT_PROBE_RESP : PKT_NORMAL; + control.requeue = !!(pkt_data-internal_flags TX_FLAG_REQUEUE); control.queue = pkt_data-queue; ret = ieee80211_tx(odev, skb, control, @@ -1594,8 +1623,10 @@ static int ieee80211_subif_start_xmit(st pkt_data = (struct ieee80211_tx_packet_data *)skb-cb; memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); pkt_data-ifindex = sdata-dev-ifindex; - pkt_data-mgmt_iface = (sdata-type == IEEE80211_IF_TYPE_MGMT); - pkt_data-do_not_encrypt = no_encrypt; + if (sdata-type == IEEE80211_IF_TYPE_MGMT) + pkt_data-internal_flags |= TX_FLAG_INJECTED; + if (!no_encrypt) + pkt_data-flags |= NL80211_FLAG_ENCRYPT; skb-dev = sdata-master; sdata-stats.tx_packets++; @@ -1646,11 +1677,12 @@ ieee80211_mgmt_start_xmit(struct sk_buff pkt_data = (struct
Re: [NET_SCHED]: Add mask support to fwmark classifier
Thomas Graf wrote: * Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29 The problem is that in order to avoid walking through all filters contained in one instance, we need to mask the value before the lookup. This means all filters share the same mask, which is taken from the first filter created and stored in the filter head. The user interface however always refers to a single filter, not the head, so it can't be changed afterwards unless we just overwrite it whenever a new filter is installed. Both is not really perfect. The current patch doesn't allow to change the mark and enforces that all filters use the same one, which I think is better than allowing inconsistent configurations. The other option gets down to replacing the hash table with a list and that's not an option in my opinion. This looks very good to me. Great, thanks. I'll send it off to Dave with two similar patches for IPv4 and DecNET routing rules. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[DECNET]: Add support for fwmark masks in routing rules
[DECNET]: Add support for fwmark masks in routing rules Add support for fwmark masks. For compatibility a mask of 0x is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] --- commit bcd4f6996453aaf0a8d5515dcc533115621c961f tree 62909d3d2c6edd4f236284b86c4c422cb40bc489 parent 9037bbabed75d822002be78047f518d42f225a00 author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:00:12 +0200 committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:00:12 +0200 net/decnet/dn_rules.c | 20 ++-- 1 files changed, 18 insertions(+), 2 deletions(-) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 50e819e..63ad63d 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -47,6 +47,7 @@ struct dn_fib_rule u8 flags; #ifdef CONFIG_DECNET_ROUTE_FWMARK u32 fwmark; + u32 fwmask; #endif }; @@ -116,6 +117,7 @@ static struct nla_policy dn_fib_rule_pol [FRA_SRC] = { .type = NLA_U16 }, [FRA_DST] = { .type = NLA_U16 }, [FRA_FWMARK]= { .type = NLA_U32 }, + [FRA_FWMASK]= { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; @@ -130,7 +132,7 @@ static int dn_fib_rule_match(struct fib_ return 0; #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r-fwmark (r-fwmark != fl-fld_fwmark)) + if ((r-fwmark ^ fl-fld_fwmark) r-fwmask) return 0; #endif @@ -168,8 +170,17 @@ static int dn_fib_rule_configure(struct r-dst = nla_get_u16(tb[FRA_DST]); #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (tb[FRA_FWMARK]) + if (tb[FRA_FWMARK]) { r-fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (r-fwmark) + /* compatibility: if the mark value is non-zero all bits +* are compared unless a mask is explicitly specified. +*/ + r-fwmask = 0x; + } + + if (tb[FRA_FWMASK]) + r-fwmask = nla_get_u32(tb[FRA_FWMASK]); #endif r-src_len = frh-src_len; @@ -195,6 +206,9 @@ static int dn_fib_rule_compare(struct fi #ifdef CONFIG_DECNET_ROUTE_FWMARK if (tb[FRA_FWMARK] (r-fwmark != nla_get_u32(tb[FRA_FWMARK]))) return 0; + + if (tb[FRA_FWMASK] (r-fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif if (tb[FRA_SRC] (r-src != nla_get_u16(tb[FRA_SRC]))) @@ -237,6 +251,8 @@ static int dn_fib_rule_fill(struct fib_r #ifdef CONFIG_DECNET_ROUTE_FWMARK if (r-fwmark) NLA_PUT_U32(skb, FRA_FWMARK, r-fwmark); + if (r-fwmask || r-fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, r-fwmask); #endif if (r-dst_len) NLA_PUT_U16(skb, FRA_DST, r-dst);
[NET_SCHED]: Add mask support to fwmark classifier
[NET_SCHED]: Add mask support to fwmark classifier Support masking the nfmark value before the search. The mask value is global for all filters contained in one instance. It can only be set when a new instance is created, all filters must specify the same mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] --- commit 734b411074d5cdb6cf1d85c7460f63730fe958f6 tree 4324105ebc0a46250cc564ecbfa3f11b8dba4369 parent bcd4f6996453aaf0a8d5515dcc533115621c961f author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:01:20 +0200 committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:01:20 +0200 include/linux/pkt_cls.h |1 + net/sched/cls_fw.c | 25 - 2 files changed, 25 insertions(+), 1 deletions(-) diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index bd2c5a2..c3f01b3 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -305,6 +305,7 @@ enum TCA_FW_POLICE, TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, __TCA_FW_MAX }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index e6973d9..e54acc6 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -50,6 +50,7 @@ #define HTSIZE (PAGE_SIZE/sizeof(struct struct fw_head { struct fw_filter *ht[HTSIZE]; + u32 mask; }; struct fw_filter @@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *s struct fw_filter *f; int r; #ifdef CONFIG_NETFILTER - u32 id = skb-nfmark; + u32 id = skb-nfmark head-mask; #else u32 id = 0; #endif @@ -209,7 +210,9 @@ static int fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, struct rtattr **tb, struct rtattr **tca, unsigned long base) { + struct fw_head *head = (struct fw_head *)tp-root; struct tcf_exts e; + u32 mask; int err; err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], e, fw_ext_map); @@ -232,6 +235,15 @@ #ifdef CONFIG_NET_CLS_IND } #endif /* CONFIG_NET_CLS_IND */ + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + goto errout; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + if (mask != head-mask) + goto errout; + } else if (head-mask != 0x) + goto errout; + tcf_exts_change(tp, f-exts, e); return 0; @@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *t return -EINVAL; if (head == NULL) { + u32 mask = 0x; + if (tb[TCA_FW_MASK-1]) { + if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) + return -EINVAL; + mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); + } + head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; + head-mask = mask; tcf_tree_lock(tp); tp-root = head; @@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp static int fw_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { + struct fw_head *head = (struct fw_head *)tp-root; struct fw_filter *f = (struct fw_filter*)fh; unsigned char*b = skb-tail; struct rtattr *rta; @@ -351,6 +372,8 @@ #ifdef CONFIG_NET_CLS_IND if (strlen(f-indev)) RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f-indev); #endif /* CONFIG_NET_CLS_IND */ + if (head-mask != 0x) + RTA_PUT(skb, TCA_FW_MASK, 4, head-mask); if (tcf_exts_dump(skb, f-exts, fw_ext_map) 0) goto rtattr_failure;
[IPV4]: Add support for fwmark masks in routing rules
Hi Dave, these three patches add support for masking the nfmark value in a few spots where it would be useful in an attempt to make life easier for users using it for multiple unrelated things. [IPV4]: Add support for fwmark masks in routing rules Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of 0x is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] --- commit 9037bbabed75d822002be78047f518d42f225a00 tree 2ccc07b7c4d7f20b2b8722ed935908595c197803 parent e6d442e62c126e11b3199ca1bddeb7534a7cb15e author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 13:59:10 +0200 committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 13:59:10 +0200 include/linux/fib_rules.h |3 ++- net/ipv4/fib_rules.c | 21 +++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 19a82b6..4418c8d 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,12 +34,13 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FWMARK, /* netfilter mark */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, FRA_UNUSED8, FRA_TABLE, /* Extended table id */ + FRA_FWMASK, /* mask for netfilter mark */ __FRA_MAX }; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ce185ac..280f424 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -46,6 +46,7 @@ struct fib4_rule u32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK u32 fwmark; + u32 fwmask; #endif #ifdef CONFIG_NET_CLS_ROUTE u32 tclassid; @@ -160,7 +161,7 @@ static int fib4_rule_match(struct fib_ru return 0; #ifdef CONFIG_IP_ROUTE_FWMARK - if (r-fwmark (r-fwmark != fl-fl4_fwmark)) + if ((r-fwmark ^ fl-fl4_fwmark) r-fwmask) return 0; #endif @@ -183,6 +184,7 @@ static struct nla_policy fib4_rule_polic [FRA_SRC] = { .type = NLA_U32 }, [FRA_DST] = { .type = NLA_U32 }, [FRA_FWMARK]= { .type = NLA_U32 }, + [FRA_FWMASK]= { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; @@ -219,8 +221,17 @@ static int fib4_rule_configure(struct fi rule4-dst = nla_get_u32(tb[FRA_DST]); #ifdef CONFIG_IP_ROUTE_FWMARK - if (tb[FRA_FWMARK]) + if (tb[FRA_FWMARK]) { rule4-fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule4-fwmark) + /* compatibility: if the mark value is non-zero all bits +* are compared unless a mask is explicitly specified. +*/ + rule4-fwmask = 0x; + } + + if (tb[FRA_FWMASK]) + rule4-fwmask = nla_get_u32(tb[FRA_FWMASK]); #endif #ifdef CONFIG_NET_CLS_ROUTE @@ -256,6 +267,9 @@ static int fib4_rule_compare(struct fib_ #ifdef CONFIG_IP_ROUTE_FWMARK if (tb[FRA_FWMARK] (rule4-fwmark != nla_get_u32(tb[FRA_FWMARK]))) return 0; + + if (tb[FRA_FWMASK] (rule4-fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif #ifdef CONFIG_NET_CLS_ROUTE @@ -285,6 +299,9 @@ static int fib4_rule_fill(struct fib_rul #ifdef CONFIG_IP_ROUTE_FWMARK if (rule4-fwmark) NLA_PUT_U32(skb, FRA_FWMARK, rule4-fwmark); + + if (rule4-fwmask || rule4-fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, rule4-fwmask); #endif if (rule4-dst_len)
Re: [DECNET]: Add support for fwmark masks in routing rules
Hi, On Fri, Aug 25, 2006 at 02:14:12PM +0200, Patrick McHardy wrote: [DECNET]: Add support for fwmark masks in routing rules Add support for fwmark masks. For compatibility a mask of 0x is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] Acked-by: Steven Whitehouse [EMAIL PROTECTED] Looks good, Steve. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NET_SCHED]: Add mask support to fwmark classifier
On Fri, 2006-25-08 at 14:02 +0200, Patrick McHardy wrote: Thomas Graf wrote: * Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29 The problem is that in order to avoid walking through all filters contained in one instance, we need to mask the value before the lookup. This means all filters share the same mask, which is taken from the first filter created and stored in the filter head. The user interface however always refers to a single filter, not the head, so it can't be changed afterwards unless we just overwrite it whenever a new filter is installed. Both is not really perfect. The current patch doesn't allow to change the mark and enforces that all filters use the same one, which I think is better than allowing inconsistent configurations. The other option gets down to replacing the hash table with a list and that's not an option in my opinion. This looks very good to me. Great, thanks. I'll send it off to Dave with two similar patches for IPv4 and DecNET routing rules. ACKed by me as well. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
IBM eHEA Device Driver upstream inclusion
Hi Jeff, the IBM eHEA Device Driver has been discussed on the netdev, linux-ppc and kernel mailing list for some time. The latest patch set we posted can be found at: http://www.spinics.net/lists/netdev/msg12820.html As the discussion seems to have settled, please consider our driver for upstream inclusion. Thanks, Jan-Bernd Themann Christoph Raisch - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2.6.17 0/9] NetXen: 1G/10G Ethernet Driver - patch for big-endian systems
Wendy, Michael, Ueimor, Thanks for the patch and feedback. We'll integrate these into our driver and post an update asap. -Amit On Friday 25 August 2006 03:10, Francois Romieu wrote: wen xiong [EMAIL PROTECTED] : [...] diff -Nuar old/drivers/net/netxen/netxen_nic_hw.c new/drivers/net/netxen/netxen_nic_hw.c --- old/drivers/net/netxen/netxen_nic_hw.c 2006-08-23 12:58:43.0 -0500 +++ new/drivers/net/netxen/netxen_nic_hw.c2006-08-23 13:15:19.0 -0500 @@ -313,7 +313,8 @@ } } CMD_DESC_TCP_HDR_OFFSET_WRT(desc, skb-h.raw - skb-data); - desc-ip_hdr_offset = skb-nh.raw - skb-data; + desc-length_tcp_hdr=cpu_to_le32(desc-length_tcp_hdr); s/=/ = / (several occurences) [...] diff -Nuar old/drivers/net/netxen/netxen_nic_init.c new/drivers/net/netxen/netxen_nic_init.c --- old/drivers/net/netxen/netxen_nic_init.c2006-08-23 12:58:43.0 -0500 +++ new/drivers/net/netxen/netxen_nic_init.c 2006-08-23 13:15:19.0 -0500 @@ -494,7 +494,7 @@ desc_head = recv_ctx-rcv_status_desc_head; desc = desc_head[consumer]; - if ((desc-owner STATUS_OWNER_HOST)) + if (((le16_to_cpu(desc-owner)) STATUS_OWNER_HOST)) Would it make a difference to swab the constant part, i.e.: if (desc-owner cpu_to_le16(STATUS_OWNER_HOST)) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2.6.17 0/9] NetXen: 1G/10G Ethernet Driver
Hi Don, Thanks. We'll lindent the sources and post an update asap. -Amit On Thursday 24 August 2006 05:34, Don Fry wrote: It looks like you have not run the source throught Lindent as previously requested. Before you submit the code again, please use the Lindent script. I can get the code to ping between two cards. Will be doing more testing tomorrow. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 0/3] secid reconciliation-v01: Repost patchset with up dates
I like these changes, but wondering why you haven't supplied code for the outbound case ? - James The code for the outbound is still in the works. I hope to have it out in a week or so. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 0/3] secid reconciliation-v01: Repost patchset with up dates
On Fri, 25 Aug 2006, Venkat Yekkirala wrote: I like these changes, but wondering why you haven't supplied code for the outbound case ? - James The code for the outbound is still in the works. I hope to have it out in a week or so. Ok, I guess we should wait until then before incorporating the patches (also, for Paul Moore to return and comment re. CIPSO). - James -- James Morris [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 44/44] [XFRM] IPV6: Support Mobile IPv6 extension headers sorting.
On Fri, 25 Aug 2006 03:16:51 -0700 (PDT) David Miller [EMAIL PROTECTED] wrote: From: Masahide NAKAMURA [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 19:06:40 +0900 I've found a problem about MIPv6 CN with the patch below. We just need to search by priority in the inexact list, even if we get a hit in the hash table. The fix is trivial, please try this patch: Thank you for providing it quickly. It works! I continue my test with this patch for now to confirm other features. Regards, -- Masahide NAKAMURA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NET_SCHED]: Add mask support to fwmark classifier
On Fri, 2006-25-08 at 14:02 +0200, Patrick McHardy wrote: Thomas Graf wrote: * Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29 The problem is that in order to avoid walking through all filters contained in one instance, we need to mask the value before the lookup. This means all filters share the same mask, which is taken from the first filter created and stored in the filter head. The user interface however always refers to a single filter, not the head, so it can't be changed afterwards unless we just overwrite it whenever a new filter is installed. Both is not really perfect. The current patch doesn't allow to change the mark and enforces that all filters use the same one, which I think is better than allowing inconsistent configurations. The other option gets down to replacing the hash table with a list and that's not an option in my opinion. This looks very good to me. This doesnt obsolete my previous ack, but: Another approach could have been to add the mask as part of the hashing. and you add the new hash field not in the head rather in the filter. At runtime, you hash - walk the bucket and compare the mask as well as the index. The above could be a future improvement. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules.
[IPV6] ROUTE: Add support for fwmask in routing rules. Add support for fwmark masks. A mask of 0x is used when a mark value != 0 is sent without a mask. Based on patch for net/ipv4/fib_rules.c by Patrick McHardy [EMAIL PROTECTED]. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] --- net/ipv6/fib6_rules.c | 24 ++-- 1 files changed, 22 insertions(+), 2 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 3d64c71..ee4aa43 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -28,6 +28,7 @@ struct fib6_rule struct rt6key dst; #ifdef CONFIG_IPV6_ROUTE_FWMARK u32 fwmark; + u32 fwmask; #endif u8 tclass; }; @@ -128,7 +129,7 @@ static int fib6_rule_match(struct fib_ru return 0; #ifdef CONFIG_IPV6_ROUTE_FWMARK - if (r-fwmark (r-fwmark != fl-fl6_fwmark)) + if ((r-fwmark ^ fl-fl6_fwmark) / r-fwmask) return 0; #endif @@ -141,6 +142,7 @@ static struct nla_policy fib6_rule_polic [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, [FRA_FWMARK]= { .type = NLA_U32 }, + [FRA_FWMASK]= { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; @@ -174,8 +176,20 @@ static int fib6_rule_configure(struct fi sizeof(struct in6_addr)); #ifdef CONFIG_IPV6_ROUTE_FWMARK - if (tb[FRA_FWMARK]) + if (tb[FRA_FWMARK]) { rule6-fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule6-fwmark) { + /* +* if the mark value is non-zero, +* all bits are compared by default +* unless a mask is explicitly specified. +*/ + rule6-fwmask = 0x; + } + } + + if (tb[FRA_FWMASK]) + rule6-fwmask = nla_get_u32(tb[FRA_FWMASK]); #endif rule6-src.plen = frh-src_len; @@ -212,6 +226,9 @@ static int fib6_rule_compare(struct fib_ #ifdef CONFIG_IPV6_ROUTE_FWMARK if (tb[FRA_FWMARK] (rule6-fwmark != nla_get_u32(tb[FRA_FWMARK]))) return 0; + + if (tb[FRA_FWMASK] (rule6-fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif return 1; @@ -238,6 +255,9 @@ static int fib6_rule_fill(struct fib_rul #ifdef CONFIG_IPV6_ROUTE_FWMARK if (rule6-fwmark) NLA_PUT_U32(skb, FRA_FWMARK, rule6-fwmark); + + if (rule6-fwmask) + NLA_PUT_U32(skb, FRA_FWMASK, rule6-fwmask); #endif return 0; -- YOSHIFUJI Hideaki @ USAGI Project [EMAIL PROTECTED] GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy.
[IPV6] ROUTE: Fix size of fib6_rule_policy. It should not be RTA_MAX+1 but FRA_MAX+1. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] --- net/ipv6/fib6_rules.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4cd5c0..3d64c71 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -135,7 +135,7 @@ #endif return 1; } -static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { +static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_IFNAME]= { .type = NLA_STRING }, [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, -- YOSHIFUJI Hideaki @ USAGI Project [EMAIL PROTECTED] GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] [IPV6] Policy Routing Updates
Hello. Here's some IPv6 policy rouging fixes on top of net-2.6.19 tree. [PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support. [PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy. If we accept Patrick's IPv4 fwmask patch, here's the one for IPv6. [PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules. -- YOSHIFUJI Hideaki @ USAGI Project [EMAIL PROTECTED] GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support.
[IPV6] ROUTE: Fix FWMARK support. - Add missing nla_policy entry. - type of fwmark is u32, not u8. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] --- net/ipv6/fib6_rules.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index aebd9e2..b4cd5c0 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -27,7 +27,7 @@ struct fib6_rule struct rt6key src; struct rt6key dst; #ifdef CONFIG_IPV6_ROUTE_FWMARK - u8 fwmark; + u32 fwmark; #endif u8 tclass; }; @@ -140,6 +140,7 @@ static struct nla_policy fib6_rule_polic [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, + [FRA_FWMARK]= { .type = NLA_U32 }, [FRA_TABLE] = { .type = NLA_U32 }, }; -- YOSHIFUJI Hideaki @ USAGI Project [EMAIL PROTECTED] GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] nbd: deadlock prevention for NBD
Use sk_set_vmio() on the nbd socket. Limit each request to 1 page, so that the request throttling also limits the number of in-flight pages and force the IO scheduler to NOOP as anything else doesn't make sense anyway. Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] Signed-off-by: Daniel Phillips [EMAIL PROTECTED] --- drivers/block/nbd.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) Index: linux-2.6/drivers/block/nbd.c === --- linux-2.6.orig/drivers/block/nbd.c +++ linux-2.6/drivers/block/nbd.c @@ -135,7 +135,6 @@ static int sock_xmit(struct socket *sock spin_unlock_irqrestore(current-sighand-siglock, flags); do { - sock-sk-sk_allocation = GFP_NOIO; iov.iov_base = buf; iov.iov_len = size; msg.msg_name = NULL; @@ -361,8 +360,16 @@ static void nbd_do_it(struct nbd_device BUG_ON(lo-magic != LO_MAGIC); + sk_adjust_memalloc(0, 1); + if (sk_set_vmio(lo-sock-sk)) + printk(KERN_WARNING + failed to set SOCK_VMIO on NBD socket\n); + while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); + + sk_adjust_memalloc(0, -1); + return; } @@ -525,6 +533,7 @@ static int nbd_ioctl(struct inode *inode if (S_ISSOCK(inode-i_mode)) { lo-file = file; lo-sock = SOCKET_I(inode); + lo-sock-sk-sk_allocation = GFP_NOIO; error = 0; } else { fput(file); @@ -628,11 +637,16 @@ static int __init nbd_init(void) * every gendisk to have its very own request_queue struct. * These structs are big so we dynamically allocate them. */ - disk-queue = blk_init_queue(do_nbd_request, nbd_lock); + disk-queue = blk_init_queue_node_elv(do_nbd_request, + nbd_lock, -1, noop); if (!disk-queue) { put_disk(disk); goto out; } + blk_queue_pin_elevator(disk-queue); + blk_queue_max_segment_size(disk-queue, PAGE_SIZE); + blk_queue_max_hw_segments(disk-queue, 1); + blk_queue_max_phys_segments(disk-queue, 1); } if (register_blkdev(NBD_MAJOR, nbd)) { - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] blkdev: iosched selection for queue creation
Provide an block queue init function that allows to set an elevator. And a function to pin the current elevator. Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] Signed-off-by: Daniel Phillips [EMAIL PROTECTED] --- block/elevator.c |5 + block/ll_rw_blk.c | 12 ++-- include/linux/blkdev.h |9 + 3 files changed, 24 insertions(+), 2 deletions(-) Index: linux-2.6/block/ll_rw_blk.c === --- linux-2.6.orig/block/ll_rw_blk.c +++ linux-2.6/block/ll_rw_blk.c @@ -1899,6 +1899,14 @@ EXPORT_SYMBOL(blk_init_queue); request_queue_t * blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { + return blk_init_queue_node_elv(rfn, lock, node_id, NULL); +} +EXPORT_SYMBOL(blk_init_queue_node); + +request_queue_t * +blk_init_queue_node_elv(request_fn_proc *rfn, spinlock_t *lock, int node_id, + char *elv_name) +{ request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); if (!q) @@ -1939,7 +1947,7 @@ blk_init_queue_node(request_fn_proc *rfn /* * all done */ - if (!elevator_init(q, NULL)) { + if (!elevator_init(q, elv_name)) { blk_queue_congestion_threshold(q); return q; } @@ -1947,7 +1955,7 @@ blk_init_queue_node(request_fn_proc *rfn blk_put_queue(q); return NULL; } -EXPORT_SYMBOL(blk_init_queue_node); +EXPORT_SYMBOL(blk_init_queue_node_elv); int blk_get_queue(request_queue_t *q) { Index: linux-2.6/include/linux/blkdev.h === --- linux-2.6.orig/include/linux/blkdev.h +++ linux-2.6/include/linux/blkdev.h @@ -444,6 +444,12 @@ struct request_queue #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_ELVPINNED 9 /* pin the current elevator */ + +static inline void blk_queue_pin_elevator(struct request_queue *q) +{ + set_bit(QUEUE_FLAG_ELVPINNED, q-queue_flags); +} enum { /* @@ -696,6 +702,9 @@ static inline void elv_dispatch_add_tail /* * Access functions for manipulating queue properties */ +extern request_queue_t *blk_init_queue_node_elv(request_fn_proc *rfn, + spinlock_t *lock, int node_id, + char *elv_name); extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id); extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *); Index: linux-2.6/block/elevator.c === --- linux-2.6.orig/block/elevator.c +++ linux-2.6/block/elevator.c @@ -861,6 +861,11 @@ ssize_t elv_iosched_store(request_queue_ size_t len; struct elevator_type *e; + if (test_bit(QUEUE_FLAG_ELVPINNED, q-queue_flags)) { + printk(KERN_NOTICE elevator: cannot switch elevator, pinned\n); + return count; + } + elevator_name[sizeof(elevator_name) - 1] = '\0'; strncpy(elevator_name, name, sizeof(elevator_name) - 1); len = strlen(elevator_name); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] nfs: deadlock prevention for NFS
Provide a proper a_ops-swapfile() implementation for NFS. This will set the NFS socket to SOCK_VMIO and put the socket reconnection under PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done). Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- fs/nfs/file.c | 21 - include/linux/sunrpc/xprt.h |4 +++- net/sunrpc/xprtsock.c | 16 3 files changed, 39 insertions(+), 2 deletions(-) Index: linux-2.6/fs/nfs/file.c === --- linux-2.6.orig/fs/nfs/file.c +++ linux-2.6/fs/nfs/file.c @@ -27,6 +27,7 @@ #include linux/slab.h #include linux/pagemap.h #include linux/smp_lock.h +#include net/sock.h #include asm/uaccess.h #include asm/system.h @@ -317,7 +318,25 @@ static int nfs_release_page(struct page static int nfs_swapfile(struct address_space *mapping, int enable) { - return 0; + int err = -EINVAL; + struct rpc_clnt *client = NFS_CLIENT(mapping-host); + struct sock *sk = client-cl_xprt-inet; + + if (enable) { + client-cl_xprt-swapper = 1; + /* +* keep one extra sock reference so the reserve won't dip +* when the socket gets reconnected. +*/ + sk_adjust_memalloc(1, 1); + err = sk_set_vmio(sk); + } else if (client-cl_xprt-swapper) { + client-cl_xprt-swapper = 0; + sk_adjust_memalloc(-1, -1); + err = sk_clear_vmio(sk); + } + + return err; } const struct address_space_operations nfs_file_aops = { Index: linux-2.6/net/sunrpc/xprtsock.c === --- linux-2.6.orig/net/sunrpc/xprtsock.c +++ linux-2.6/net/sunrpc/xprtsock.c @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *) args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void * dprintk(RPC: xs_udp_connect_worker for xprt %p\n, xprt); + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + /* Start by resetting any existing state */ xs_close(xprt); @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void * xprt-sock = sock; xprt-inet = sk; + if (xprt-swapper) + sk_set_vmio(sk); + write_unlock_bh(sk-sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void * out: xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + current-flags = pflags; } /* @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *)args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) goto out; + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + dprintk(RPC: xs_tcp_connect_worker for xprt %p\n, xprt); if (!xprt-sock) { @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void * break; } } + + if (xprt-swapper) + sk_set_vmio(xprt-inet); out: xprt_wake_pending_tasks(xprt, status); out_clear: xprt_clear_connecting(xprt); + current-flags = pflags; } /** Index: linux-2.6/include/linux/sunrpc/xprt.h === --- linux-2.6.orig/include/linux/sunrpc/xprt.h +++ linux-2.6/include/linux/sunrpc/xprt.h @@ -147,7 +147,9 @@ struct rpc_xprt { unsigned intmax_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ - resvport : 1; /* use a reserved port */ + resvport : 1, /* use a reserved port */ + swapper: 1; /* we're swapping over this + transport */ /* * XID - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/4] VM deadlock prevention -v5
Hi, The latest version of the VM deadlock prevention work. The basic premises is that network sockets serving the VM need undisturbed functionality in the face of severe memory shortage. This patch-set provides the framework to provide this. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] net: VM deadlock avoidance framework
The core of the VM deadlock avoidance framework. In order to provide robust networked block devices there must be a guarantee of progress. That is, the block device must never stall because of OOM because the device itself might be needed to get out of OOM (reclaim pageout). This means that the device queue must always be unplugable, this in turn means that it must always find enough memory to build/send packets over the network _and_ receive ACKs for those packets. The network stack has a huge capacity for buffering packets; waiting for user-space to read them. There is a practical limit imposed to avoid DoS scenarios. These two things make for a deadlock; what if the receive limit is reached and all packets are buffered in non-critical sockets (those not serving the network block device waiting for an ACK to free a page). Memory pressure will add to that; what if there is simply no memory left to receive packets in. This patch provides a service to register sockets as critical; SOCK_VMIO is a promise the socket will never block on receive. Along with with a memory reserve that will service a limited number of packets this can guarantee full service to these critical sockets. When we make sure that packets allocated from the reserve will only service critical sockets we will not lose the memory and can guarantee progress. Since memory is tight and the reserve modest, we do not want to lose memory to fragmentation effects. Hence a very simple allocator is used to guarantee that the memory used for each packet is returned to the page allocator. Converted protocols: IPv4 IPv6: - icmp - udp - tcp IPv4: - igmp Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] Signed-off-by: Daniel Phillips [EMAIL PROTECTED] --- include/linux/gfp.h|3 - include/linux/mmzone.h |1 include/linux/skbuff.h | 13 -- include/net/sock.h | 37 + mm/page_alloc.c| 41 ++- net/core/skbuff.c | 103 ++--- net/core/sock.c| 97 ++ net/ipv4/af_inet.c |3 + net/ipv4/icmp.c|3 + net/ipv4/igmp.c|3 + net/ipv4/tcp_ipv4.c|3 + net/ipv4/udp.c |8 +++ net/ipv6/af_inet6.c|3 + net/ipv6/icmp.c|3 + net/ipv6/tcp_ipv6.c|3 + net/ipv6/udp.c |3 + 16 files changed, 305 insertions(+), 22 deletions(-) Index: linux-2.6/include/linux/gfp.h === --- linux-2.6.orig/include/linux/gfp.h +++ linux-2.6/include/linux/gfp.h @@ -46,6 +46,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x1u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x2u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_EMERG ((__force gfp_t)0x4u) /* Use emergency reserves */ #define __GFP_BITS_SHIFT 20/* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 __GFP_BITS_SHIFT) - 1)) @@ -54,7 +55,7 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_EMERG) /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC ~__GFP_HIGH) Index: linux-2.6/include/linux/mmzone.h === --- linux-2.6.orig/include/linux/mmzone.h +++ linux-2.6/include/linux/mmzone.h @@ -420,6 +420,7 @@ int percpu_pagelist_fraction_sysctl_hand void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int adjust_memalloc_reserve(int bytes); #include linux/topology.h /* Returns the number of the current Node. */ Index: linux-2.6/include/linux/skbuff.h === --- linux-2.6.orig/include/linux/skbuff.h +++ linux-2.6/include/linux/skbuff.h @@ -282,7 +282,8 @@ struct sk_buff { nfctinfo:3; __u8pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + emerg:1; __be16 protocol; void(*destructor)(struct sk_buff *skb); @@ -327,10 +328,13 @@ struct sk_buff { #include asm/system.h +#define SKB_ALLOC_FCLONE 0x01 +#define SKB_ALLOC_RX 0x02
Re: [PATCH 0/4] VM deadlock prevention -v5
On Fri, 25 Aug 2006, Peter Zijlstra wrote: The basic premises is that network sockets serving the VM need undisturbed functionality in the face of severe memory shortage. This patch-set provides the framework to provide this. Hmmm.. Is it not possible to avoid the memory pools by guaranteeing that a certain number of page is easily reclaimable? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/7] [DCCP]: Fixes and enhancements
On 8/24/06, Ian McDonald [EMAIL PROTECTED] wrote: I spent all of today on USAGI's IPSEC/MIPV6 patches and related issues, so I'll look into this tomorrow. Thanks Ian. Yes I saw that. Take your time as this is nowhere near as important! Sigh, I'm still busy indeed, gave a quick look at the series and up to the 10th patch its OK at first sight, should improve the current situation, thanks. - Arnaldo - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/4] VM deadlock prevention -v5
On Fri, 2006-08-25 at 08:51 -0700, Christoph Lameter wrote: On Fri, 25 Aug 2006, Peter Zijlstra wrote: The basic premises is that network sockets serving the VM need undisturbed functionality in the face of severe memory shortage. This patch-set provides the framework to provide this. Hmmm.. Is it not possible to avoid the memory pools by guaranteeing that a certain number of page is easily reclaimable? We're not actually using mempools, but the memalloc reserve. Purely easy reclaimable memory is not enough however, since packet receive happens from IRQ context, and we cannot unmap pages in IRQ context. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/4] VM deadlock prevention -v5
Christoph Lameter wrote: On Fri, 25 Aug 2006, Peter Zijlstra wrote: The basic premises is that network sockets serving the VM need undisturbed functionality in the face of severe memory shortage. This patch-set provides the framework to provide this. Hmmm.. Is it not possible to avoid the memory pools by guaranteeing that a certain number of page is easily reclaimable? No. You need to guarantee that the memory is not gobbled up by another subsystem, but remains available for use by *this* subsystem. Otherwise you could still deadlock. -- What is important? What you want to be true, or what is true? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PATCH] IPv6 Updates for net-2.6.19
* YOSHIFUJI Hideaki / ?$B5HF#1QL@ [EMAIL PROTECTED] 2006-08-25 17:21 commit 10204d532f5f8bb379009ba0bee2113bafda72be Author: YOSHIFUJI Hideaki [EMAIL PROTECTED] Date: Mon Aug 21 19:22:01 2006 +0900 [IPV6] ROUTE: Routing by FWMARK. Based on patch by Jean Lorchat [EMAIL PROTECTED]. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 19a82b6..2987549 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,7 +34,7 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, You're missing the validation policy entry for FRA_FWMARK in the IPv6 policy. diff --git a/include/net/flow.h b/include/net/flow.h index e052291..3ca210e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -26,6 +26,7 @@ struct flowi { struct { struct in6_addr daddr; struct in6_addr saddr; + __u32 fwmark; __u32 flowlabel; } ip6_u; Since all flowi users now use fwmark it can be moved out of the union. diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 91f6233..aebd9e2 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -26,6 +26,9 @@ struct fib6_rule struct fib_rule common; struct rt6key src; struct rt6key dst; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + u8 fwmark; +#endif u8 tclass; }; This doesn't look right. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/3] [IPV6] Policy Routing Updates
* YOSHIFUJI Hideaki / ?$B5HF#1QL@ [EMAIL PROTECTED] 2006-08-26 00:08 Hello. Here's some IPv6 policy rouging fixes on top of net-2.6.19 tree. [PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support. [PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy. If we accept Patrick's IPv4 fwmask patch, here's the one for IPv6. [PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules. Haven't noticed them in time, ignore my comments on the previous patches. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[IPV6] Q: corrupt checksums when transferring data
I'm enabling e1000 to offload IPv6 since the 2.6.18+ kernels support it. The kernel I'm testing is 2.6.18-rc4. Everything with the hardware offload is working fine, but it appears that the GSO code may not correctly segment frames sometimes for IPv6 traffic. I did a tcpdump on both ends with all hardware offloading disabled through ethtool. Here is what I got, note the long frame and then the retransmit. Has this problem been addressed already? I'll compile and test a couple newer kernels, any suggested target patches or kernels would be appreciated. Sender: === 15:56:28.769034 bk1-6.33541 lh2-6.12865: S 3200244805:3200244805(0) win 5760 mss 1440,sackOK,timestamp 64767859 0,nop,wscale 7 15:56:28.769042 lh2-6.12865 bk1-6.33541: S 1558653050:1558653050(0) ack 3200244806 win 5712 mss 1440,sackOK,timestamp 172654320 64767859,nop,wscale 7 15:56:28.769102 bk1-6.33541 lh2-6.12865: . ack 1 win 45 nop,nop,timestamp 64767859 172654320 15:56:28.769350 bk1-6.33541 lh2-6.12865: P 1:257(256) ack 1 win 45 nop,nop,timestamp 64767859 172654320 15:56:28.769381 lh2-6.12865 bk1-6.33541: . ack 257 win 53 nop,nop,timestamp 172654320 64767859 15:56:28.769731 lh2-6.12865 bk1-6.33541: P 1:257(256) ack 257 win 53 nop,nop,timestamp 172654320 64767859 15:56:28.769851 bk1-6.33541 lh2-6.12865: . ack 257 win 54 nop,nop,timestamp 64767860 172654320 15:56:28.769860 bk1-6.46315 lh2-6.35704: S 3205139672:3205139672(0) win 5760 mss 1440,sackOK,timestamp 64767860 0,nop,wscale 7 15:56:28.769873 lh2-6.35704 bk1-6.46315: S 1557432368:1557432368(0) ack 3205139673 win 5712 mss 1440,sackOK,timestamp 172654320 64767860,nop,wscale 7 15:56:28.769975 bk1-6.46315 lh2-6.35704: . ack 1 win 45 nop,nop,timestamp 64767860 172654320 15:56:28.770009 lh2-6.35704 bk1-6.46315: . 1:2857(2856) ack 1 win 45 nop,nop,timestamp 172654320 64767860 15:56:28.972354 lh2-6.35704 bk1-6.46315: . 1:1429(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767860 15:56:28.972478 bk1-6.46315 lh2-6.35704: . ack 1429 win 68 nop,nop,timestamp 64767910 172654371 15:56:28.972493 lh2-6.35704 bk1-6.46315: . 1429:2857(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 15:56:28.972602 bk1-6.46315 lh2-6.35704: . ack 2857 win 90 nop,nop,timestamp 64767910 172654371 15:56:28.972611 lh2-6.35704 bk1-6.46315: . 2857:4285(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 15:56:28.972727 bk1-6.46315 lh2-6.35704: . ack 4285 win 112 nop,nop,timestamp 64767910 172654371 15:56:28.972735 lh2-6.35704 bk1-6.46315: . 4285:5713(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 15:56:28.972742 lh2-6.35704 bk1-6.46315: . 5713:7141(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 15:56:28.972853 bk1-6.46315 lh2-6.35704: . ack 5713 win 135 nop,nop,timestamp 64767910 172654371 15:56:28.972862 lh2-6.35704 bk1-6.46315: . 7141:8569(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 15:56:28.972868 lh2-6.35704 bk1-6.46315: . 8569:9997(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 Receiver: = 15:56:28.764058 bk1-6.33541 lh2-6.12865: S 3200244805:3200244805(0) win 5760 mss 1440,sackOK,timestamp 64767859 0,nop,wscale 7 15:56:28.764181 lh2-6.12865 bk1-6.33541: S 1558653050:1558653050(0) ack 3200244806 win 5712 mss 1440,sackOK,timestamp 172654320 64767859,nop,wscale 7 15:56:28.764205 bk1-6.33541 lh2-6.12865: . ack 1 win 45 nop,nop,timestamp 64767859 172654320 15:56:28.764441 bk1-6.33541 lh2-6.12865: P 1:257(256) ack 1 win 45 nop,nop,timestamp 64767859 172654320 15:56:28.764552 lh2-6.12865 bk1-6.33541: . ack 257 win 53 nop,nop,timestamp 172654320 64767859 15:56:28.764926 lh2-6.12865 bk1-6.33541: P 1:257(256) ack 257 win 53 nop,nop,timestamp 172654320 64767859 15:56:28.764936 bk1-6.33541 lh2-6.12865: . ack 257 win 54 nop,nop,timestamp 64767860 172654320 15:56:28.764962 bk1-6.46315 lh2-6.35704: S 3205139672:3205139672(0) win 5760 mss 1440,sackOK,timestamp 64767860 0,nop,wscale 7 15:56:28.765052 lh2-6.35704 bk1-6.46315: S 1557432368:1557432368(0) ack 3205139673 win 5712 mss 1440,sackOK,timestamp 172654320 64767860,nop,wscale 7 15:56:28.765061 bk1-6.46315 lh2-6.35704: . ack 1 win 45 nop,nop,timestamp 64767860 172654320 15:56:28.765300 lh2-6.35704 bk1-6.46315: . 1:1429(1428) ack 1 win 45 nop,nop,timestamp 172654320 64767860 15:56:28.765306 lh2-6.35704 bk1-6.46315: . 1429:2857(1428) ack 1 win 45 nop,nop,timestamp 172654320 64767860 15:56:28.967565 lh2-6.35704 bk1-6.46315: . 1:1429(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767860 15:56:28.967581 bk1-6.46315 lh2-6.35704: . ack 1429 win 68 nop,nop,timestamp 64767910 172654371 15:56:28.967691 lh2-6.35704 bk1-6.46315: . 1429:2857(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 15:56:28.967702 bk1-6.46315 lh2-6.35704: . ack 2857 win 90 nop,nop,timestamp 64767910 172654371 15:56:28.967816 lh2-6.35704 bk1-6.46315: . 2857:4285(1428) ack 1 win 45 nop,nop,timestamp 172654371 64767910 15:56:28.967826 bk1-6.46315 lh2-6.35704: . ack 4285 win 112 nop,nop,timestamp 64767910
Re: [PATCH 2.6.17 2/9] NetXen: Hardware access routines
On Mon, 2006-08-21 at 07:03 -0700, Stephen Hemminger wrote: On Mon, 21 Aug 2006 13:57:23 +0530 Amit S. Kale [EMAIL PROTECTED] wrote: We can certainly create a table for all error messages. It'll hurt readability of code in many of the other places where printks are used to indicate some hardware error. -Amit My suggestion was intended as an way to handle multiple driver versions all using the same firmware or vice versa. By locking the firmware and driver version together you might make maintenance more difficult. We misunderstood your earlier comment. The compatible driver firmware images have the same major minor version numbers. Only the sub-version numbers may be different. This gives us more flexibility in releasing driver firmware fixes. Sanjeev. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/3] d80211: add support for SIOCSIWRATE, SIOCSIWTXPOW and SIOCSIWPOWER
On Mon, Aug 21, 2006 at 05:30:22PM -0700, Mohamed Abbas wrote: the attached patch will add support to handle these iw_handle SIOC[S/G]IWRATE, SIOC[S/G]IWTXPOW and SIOC[S/G]IWPOWER. It also added some changes in ieee80211_ioctl_giwrange function to report supported channels and rates. a call to ieee80211_hw_config is needed to infor the low level driver about these changes, I guess we might need to add flag to indicate which parameters was changed so the low level driver does not need to make extra calls. Could you please separate SIOCSIWRATE from the rest. I did not go through the details yet, but I do not think the proposed change here would match the the way rate control was designed in the Devicescape stack and I would not like to see this getting in before more careful review and explanation of how this is expected to work. The way I see rate settings working is that the control would be applied to the rate control algorithm and not to the list of rates itself. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RTL8136
In case you don't yet have an lspci dump for an RTL8136, here's one for a device which is working with the r1000 driver which is supplied with Ubuntu dapper (though the machine in question - a Toshiba Equium A110-233 - is actually running Debian testing.) I _should_ be able to test your patches once I've got a locally-compiled kernel working for it. [M-F-T set; not subscribed.] -- | Darren Salt| linux or ds at | nr. Ashington, | Toon | RISC OS, Linux | youmustbejoking,demon,co,uk | Northumberland | Army | + At least 4000 million too many people. POPULATION LEVEL IS UNSUSTAINABLE. If no one uses it, there's a reason. 05:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. Unknown device 8136 (rev 01) Subsystem: Toshiba America Info Systems Unknown device ff00 Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0, Cache Line Size: 64 bytes Interrupt: pin A routed to IRQ 185 Region 0: I/O ports at 4000 [size=256] Region 2: Memory at da00 (64-bit, non-prefetchable) [size=4K] [virtual] Expansion ROM at d400 [disabled] [size=64K] Capabilities: [40] Power Management version 2 Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA PME(D0-,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=0 PME+ Capabilities: [48] Vital Product Data Capabilities: [50] Message Signalled Interrupts: 64bit+ Queue=0/1 Enable- Address: Data: Capabilities: [60] Express Endpoint IRQ 0 Device: Supported: MaxPayload 128 bytes, PhantFunc 0, ExtTag+ Device: Latency L0s 1us, L1 unlimited Device: AtnBtn+ AtnInd+ PwrInd+ Device: Errors: Correctable- Non-Fatal- Fatal- Unsupported- Device: RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop- Device: MaxPayload 128 bytes, MaxReadReq 128 bytes Link: Supported Speed 2.5Gb/s, Width x1, ASPM L0s, Port 0 Link: Latency L0s unlimited, L1 unlimited Link: ASPM Disabled RCB 64 bytes CommClk+ ExtSynch- Link: Speed 2.5Gb/s, Width x1 Capabilities: [84] Vendor Specific Information Capabilities: [100] Advanced Error Reporting Capabilities: [12c] Virtual Channel Capabilities: [148] Device Serial Number 36-81-ec-10-00-00-10-01 Capabilities: [154] Power Budgeting
Re: [PATCH 01/3] d80211: add support for SIOCSIWRATE, SIOCSIWTXPOW and SIOCSIWPOWER
I will separate each pair of S/G in separate patch so it will be easier to discuss each set separately. I will provide the patch this weekend taking into account the comments provided Thanks Mohamed Jouni Malinen wrote: On Mon, Aug 21, 2006 at 05:30:22PM -0700, Mohamed Abbas wrote: the attached patch will add support to handle these iw_handle SIOC[S/G]IWRATE, SIOC[S/G]IWTXPOW and SIOC[S/G]IWPOWER. It also added some changes in ieee80211_ioctl_giwrange function to report supported channels and rates. a call to ieee80211_hw_config is needed to infor the low level driver about these changes, I guess we might need to add flag to indicate which parameters was changed so the low level driver does not need to make extra calls. Could you please separate SIOCSIWRATE from the rest. I did not go through the details yet, but I do not think the proposed change here would match the the way rate control was designed in the Devicescape stack and I would not like to see this getting in before more careful review and explanation of how this is expected to work. The way I see rate settings working is that the control would be applied to the rate control algorithm and not to the list of rates itself. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [IPV6] Q: corrupt checksums when transferring data
On Fri, 25 Aug 2006 11:13:48 -0700 Brandeburg, Jesse [EMAIL PROTECTED] wrote: I'm enabling e1000 to offload IPv6 since the 2.6.18+ kernels support it. The kernel I'm testing is 2.6.18-rc4. Yes, something is wrong with the GSO code. I am bisecting this bug http://bugzilla.kernel.org/show_bug.cgi?id=7050 It looks like GSO is handing an IPV6 segment down to the sky2 driver even though it asks for only NETIF_F_TSO. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: RTL8136
Darren Salt [EMAIL PROTECTED] : In case you don't yet have an lspci dump for an RTL8136, here's one for a device which is working with the r1000 driver which is supplied with Ubuntu dapper (though the machine in question - a Toshiba Equium A110-233 - is actually running Debian testing.) Thanks. The MM region was correctly guessed. If the driver does not work, you can try to s/RTL_CFG_1/RTL_CFG_2/ for the 0x8136 entry in the rtl8169_pci_tbl array. -- Ueimor - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PATCH] IPv6 Updates for net-2.6.19
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c9f74c1..9b50e0c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -703,6 +703,7 @@ void ip6_route_input(struct sk_buff *skb .ip6_u = { .daddr = iph-daddr, .saddr = iph-saddr, + .fwmark = skb-nfmark, .flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK, }, }, I can't build the latest 2.6.19-git with this patch, skb-nfmark requires CONFIG_NETFILTER, which isn't in my .config. The obvious workaround is the patch below, but that might not be what you want. Can send my .config if you need it. -Brian diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9b50e0c..dc880cc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -703,7 +703,9 @@ void ip6_route_input(struct sk_buff *skb .ip6_u = { .daddr = iph-daddr, .saddr = iph-saddr, +#ifdef CONFIG_NETFILTER .fwmark = skb-nfmark, +#endif .flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK, }, },
RE: [IPV6] Q: corrupt checksums when transferring data
Stephen Hemminger wrote: On Fri, 25 Aug 2006 11:13:48 -0700 Brandeburg, Jesse [EMAIL PROTECTED] wrote: I'm enabling e1000 to offload IPv6 since the 2.6.18+ kernels support it. The kernel I'm testing is 2.6.18-rc4. Yes, something is wrong with the GSO code. I am bisecting this bug http://bugzilla.kernel.org/show_bug.cgi?id=7050 It looks like GSO is handing an IPV6 segment down to the sky2 driver even though it asks for only NETIF_F_TSO. Ah ha, I was wondering if that bug report on sky2 might be related to this issue. E1000 actually sends the data I think (it just has a bad checksum) when handed a too long frame. Seems like the stack should never give us something longer than the MTU + enet header, esp with all hardware offloads disabled. So I have a very easy repro with netperf on remote: netserver -4 -6 netperf -H lh2-6,6 -t TCP_MAERTS -- -m4K -S128K -s128K The remote will generate the bad frames. Jesse - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Fw: [Bugme-new] [Bug 7058] New: CONFIG_IP_ROUTE_FWMARK breaks rp_filter checks
Begin forwarded message: Date: Fri, 25 Aug 2006 13:29:52 -0700 From: [EMAIL PROTECTED] To: [EMAIL PROTECTED] Subject: [Bugme-new] [Bug 7058] New: CONFIG_IP_ROUTE_FWMARK breaks rp_filter checks http://bugzilla.kernel.org/show_bug.cgi?id=7058 Summary: CONFIG_IP_ROUTE_FWMARK breaks rp_filter checks Kernel Version: 2.6.17.8 Status: NEW Severity: normal Owner: [EMAIL PROTECTED] Submitter: [EMAIL PROTECTED] Most recent kernel where this bug did not occur: bug present since at least early 2.4.x Distribution: debian Hardware Environment: i386 Software Environment: router/firewall Problem Description: Using a fwmark as a key for selecting among multiple routing tables (via ip rule command) breaks the rp_filter functionality since the fwmark field is not initialized in function fib_validate_source. Because of this there is no way to assure that outgoing and incoming packets use the same routing table. Steps to reproduce: You should set up a network environment where there are at least two different links from the machine A to a remote host B, and firewall rules on A to mark specific packets to this destination and back (say those destined to a certain port only and corresponding replies). Set a default route from A to B using link 1 and a different routing table for marked packets using link 2 (e.g. ip rule add fwmark 2 table 2; ip route table 2 add ...). (This is the setup used for a VPN I manage) When an incoming packet from link 2 arrives in fib_validate_source, the fwmark field will not be set despite the presence of appropriate rules in the firewall, and thus the wrong table will be used for the check causing the packet to be refused. I have prepared a small patch to resolve this issue. I've tested it for quite some time and it worked flawlessly and without side effects. I'm pasting it here: it just adds an argument to fib_validate_source so that it can set the fwmark field and passes the proper value already present in every caller. --- a/include/net/ip_fib.h 2006-08-09 20:08:14.0 +0200 +++ b/include/net/ip_fib.h 2006-08-09 19:44:44.0 +0200 @@ -234,7 +234,7 @@ extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb); -extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, +extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, u32 fwmark, struct net_device *dev, u32 *spec_dst, u32 *itag); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); --- a/net/ipv4/fib_frontend.c 2006-08-07 06:18:54.0 +0200 +++ b/net/ipv4/fib_frontend.c 2006-08-09 19:43:45.0 +0200 @@ -160,14 +160,18 @@ - check, that packet arrived from expected physical interface. */ -int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, +int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, u32 fwmark, struct net_device *dev, u32 *spec_dst, u32 *itag) { struct in_device *in_dev; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = src, .saddr = dst, - .tos = tos } }, + .tos = tos, +#ifdef CONFIG_IP_ROUTE_FWMARK + .fwmark = fwmark +#endif + } }, .iif = oif }; struct fib_result res; int no_addr, rpf; --- a/net/ipv4/route.c 2006-08-09 20:08:47.0 +0200 +++ b/net/ipv4/route.c 2006-08-09 19:46:06.0 +0200 @@ -1606,6 +1606,11 @@ goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else if (fib_validate_source(saddr, 0, tos, 0, +#ifdef CONFIG_IP_ROUTE_FWMARK + skb-nfmark, +#else + 0, /* no fwmark dependant routing */ +#endif dev, spec_dst, itag) 0) goto e_inval; @@ -1720,6 +1725,11 @@ err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), +#ifdef CONFIG_IP_ROUTE_FWMARK + skb-nfmark, +#else + 0, /* no fwmark dependant routing */ +#endif in_dev-dev, spec_dst, itag); if (err 0) { ip_handle_martian_source(in_dev-dev, in_dev, skb, daddr, @@ -1954,6 +1964,11 @@ int result; result = fib_validate_source(saddr, daddr, tos, loopback_dev.ifindex, +#ifdef CONFIG_IP_ROUTE_FWMARK +
[patch] d80211: fix crash in ieee80211_rx_michael_mic_report()
This fixes a crash at ieee80211.c line 3461, ieee80211_rx_michael_mic_report() (rx-sdata-type == IEEE80211_IF_TYPE_AP). rx.sdata needs to be set before calling ieee80211_rx_michael_mic_report(). Signed-off-by: Elliot Schwartz [EMAIL PROTECTED] Signed-off-by: David Kimdon [EMAIL PROTECTED] Index: wireless-dev/net/d80211/ieee80211.c === --- wireless-dev.orig/net/d80211/ieee80211.c +++ wireless-dev/net/d80211/ieee80211.c @@ -3582,6 +3582,11 @@ void __ieee80211_rx(struct net_device *d else sta = rx.sta = NULL; + if (sta) { + rx.dev = sta-dev; + rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); + } + if ((status-flag RX_FLAG_MMIC_ERROR)) { ieee80211_rx_michael_mic_report(dev, hdr, sta, rx); goto end; @@ -3597,8 +3602,6 @@ void __ieee80211_rx(struct net_device *d if (sta !sta-assoc_ap !(sta-flags WLAN_STA_WDS) !local-iff_promiscs !multicast) { - rx.dev = sta-dev; - rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); rx.u.rx.ra_match = 1; ieee80211_invoke_rx_handlers(local, local-rx_handlers, rx, sta); -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [stable] [PATCH] bridge-netfilter: don't overwrite memory outside of skb
On Tue, Aug 22, 2006 at 05:19:28PM -0700, Stephen Hemminger wrote: The bridge netfilter code needs to check for space at the front of the skb before overwriting; otherwise if skb from device doesn't have headroom, then it will cause random memory corruption. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] Queued to -stable, thanks. greg k-h - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
IPSec kernel oops on ppc64
I installed 2.6.17 + patch-2.6.18-rc4 + 2.6.18-rc4-mm2 onto two pSeries power 5 (ppc64 lpars) machines. I configured IPSec using the configuration listed below. A ping from one machine to the other, hangs. No packets leave the machine issuing the ping. When I tried sftp, I received following oops. Has anyone else had problems with IPSec on pSeries? [EMAIL PROTECTED] jml]# sftp hvracer1 Connecting to hvracer1... kernel BUG in skb_to_sgvec at net/xfrm/xfrm_algo.c:620! cpu 0x0: Vector: 700 (Program Check) at [c000466eb240] pc: c035f2f4: .skb_to_sgvec+0x288/0x2ec lr: d09605e0: .esp_output+0x340/0x494 [esp4] sp: c000466eb4c0 msr: 80029032 current = 0xc00045a69910 paca= 0xc0484400 pid = 2213, comm = ssh kernel BUG in skb_to_sgvec at net/xfrm/xfrm_algo.c:620! enter ? for help 0:mon t [c000466eb590] d09605e0 .esp_output+0x340/0x494 [esp4] [c000466eb680] c0357bd4 .xfrm4_output_finish2+0x2b8/0x3d0 [c000466eb720] c0357ea0 .xfrm4_output+0x74/0x88 [c000466eb7a0] c031b188 .ip_queue_xmit+0x4a8/0x540 [c000466eb8a0] c032e9b8 .tcp_transmit_skb+0x820/0x890 [c000466eb960] c0331b74 .tcp_connect+0x308/0x3b0 [c000466eba00] c03361d0 .tcp_v4_connect+0x52c/0x6c0 [c000466ebb80] c0344664 .inet_stream_connect+0x10c/0x358 [c000466ebc60] c02dba14 .sys_connect+0xd8/0x120 [c000466ebd90] c02fe420 .compat_sys_socketcall+0xdc/0x214 [c000466ebe30] c000871c syscall_exit+0x0/0x40 --- Exception: c00 (System Call) at 07a9f8fc SP (fc63f230) is in userspace Configured IPSec as follows: add x.x.x.55 x.x.x.206 esp 35590 -m transport -E 3des-cbc 06183223c23a21e8b36c566b -A hmac-md5 TAHITEST89ABCDEF; add x.x.x.206 x.x.x.55 esp 12360 -m transport -E 3des-cbc 06183223c23a21e8b36c566b -A hmac-md5 TAHITEST89ABCDEF; spdadd x.x.x.55 x.x.x.206 any -P in ipsec esp/transport//require; spdadd x.x.x.206 x.x.x.55 any -P out ipsec esp/transport//require; Same config on both machines, except for spdadd entry. The in and out are swapped on the other machine. Regards, Joy Latten - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup
Michael Chan mchan at broadcom.com writes: Turning off ASF is just a matter of changing some bits in NVRAM and recalculating the checksum. If you need the tool to do this, I'll have someone send it to you. Note that on some of the blade servers, I believe ASF is vital and should not be disabled. Still, it would be great if ASF could be disabled, because I have noticed that when ASF is enabled, the tg3 driver automatically disables TSO (TCP Segmentation Offloading). Here is a dmesg output from a server where I am seeing that behavior: eth0: Tigon3 [partno(BCM95704A6) rev 2100 PHY(5704)] (PCIX:133MHz:64-bit) \ 10/100/1000BaseT Ethernet 00:30:48:59:c4:94 eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] Split[0] WireSpeed[1] TSOcap[0] [...] eth1: Tigon3 [partno(BCM95704A6) rev 2100 PHY(5704)] (PCIX:133MHz:64-bit) \ 10/100/1000BaseT Ethernet 00:30:48:59:c4:95 eth1: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] Split[0] WireSpeed[1] TSOcap[1] Both interfaces are fundamentally TSO-capable, but since ASF is enabled on eth0, tg3 disables TSO on this interface. Of course at this point it is not even possible to use ethtool to re-enable it because the driver considers eth0 as not TSO-capable at all. As far as I know, the tg3 driver has been doing that since one of your patches shipped with 2.6.11-rc2-bk3, Michael, see [1]. Here is the relevant code snippet (line numbers are for 2.6.16): 10835 if (tp-tg3_flags2 TG3_FLG2_HW_TSO) { 10836 tp-tg3_flags2 |= TG3_FLG2_TSO_CAPABLE; 10837 } 10838 else if (GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5700 || 10839 GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5701 || 10840 tp-pci_chip_rev_id == CHIPREV_ID_5705_A0 || 10841 (tp-tg3_flags TG3_FLAG_ENABLE_ASF) != 0) { 10842 tp-tg3_flags2 = ~TG3_FLG2_TSO_CAPABLE; 10843 } else { 10844 tp-tg3_flags2 |= TG3_FLG2_TSO_CAPABLE; 10845 } The culprit is line 10841. Why is that done ? [1] ftp://ftp.us.kernel.org:/pub/linux/kernel/v2.6/snapshots/old/ patch-2.6.11-rc2-bk3.log, patch-2.6.11-rc2-bk3.bz2 -- Marc Bevand - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PATCH] IPv6 Updates for net-2.6.19
In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 15:29:25 -0400), Brian Haley [EMAIL PROTECTED] says: .saddr = iph-saddr, + .fwmark = skb-nfmark, .flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK, : I can't build the latest 2.6.19-git with this patch, skb-nfmark requires CONFIG_NETFILTER, which isn't in my .config. The obvious workaround is the patch below, but that might not be what you want. Can send my .config if you need it. Your fix is appropriate. Acked-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] --yoshfuji - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support.
From: YOSHIFUJI Hideaki [EMAIL PROTECTED] Date: Sat, 26 Aug 2006 00:08:54 +0900 (JST) [IPV6] ROUTE: Fix FWMARK support. - Add missing nla_policy entry. - type of fwmark is u32, not u8. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] Applied, thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy.
From: YOSHIFUJI Hideaki [EMAIL PROTECTED] Date: Sat, 26 Aug 2006 00:09:19 +0900 (JST) [IPV6] ROUTE: Fix size of fib6_rule_policy. It should not be RTA_MAX+1 but FRA_MAX+1. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] Applied. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules.
From: YOSHIFUJI Hideaki [EMAIL PROTECTED] Date: Sat, 26 Aug 2006 00:09:37 +0900 (JST) [IPV6] ROUTE: Add support for fwmask in routing rules. Add support for fwmark masks. A mask of 0x is used when a mark value != 0 is sent without a mask. Based on patch for net/ipv4/fib_rules.c by Patrick McHardy [EMAIL PROTECTED]. Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] Applied. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PATCH] IPv6 Updates for net-2.6.19
From: YOSHIFUJI Hideaki [EMAIL PROTECTED] Date: Sat, 26 Aug 2006 07:44:38 +0900 (JST) In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 15:29:25 -0400), Brian Haley [EMAIL PROTECTED] says: .saddr = iph-saddr, + .fwmark = skb-nfmark, .flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK, : I can't build the latest 2.6.19-git with this patch, skb-nfmark requires CONFIG_NETFILTER, which isn't in my .config. The obvious workaround is the patch below, but that might not be what you want. Can send my .config if you need it. Your fix is appropriate. Acked-by: YOSHIFUJI Hideaki [EMAIL PROTECTED] It seems to make better sense to protect this with IPV6_ROUTE_FWMARK instead of NETFILTER. And it is consistent with ipv4 side. So that's how I will fix this build problem. Thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PATCH] IPv6 Updates for net-2.6.19
In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 16:06:58 -0700 (PDT)), David Miller [EMAIL PROTECTED] says: + .fwmark = skb-nfmark, : It seems to make better sense to protect this with IPV6_ROUTE_FWMARK instead of NETFILTER. And it is consistent with ipv4 side. So that's how I will fix this build problem. Agreed. --yoshfuji - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [IPV4]: Add support for fwmark masks in routing rules
From: Patrick McHardy [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 14:14:05 +0200 [IPV4]: Add support for fwmark masks in routing rules Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of 0x is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] Applied. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [DECNET]: Add support for fwmark masks in routing rules
From: Patrick McHardy [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 14:14:12 +0200 [DECNET]: Add support for fwmark masks in routing rules Add support for fwmark masks. For compatibility a mask of 0x is used when a mark value != 0 is sent without a mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] Applied. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [NET_SCHED]: Add mask support to fwmark classifier
From: Patrick McHardy [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 14:14:18 +0200 [NET_SCHED]: Add mask support to fwmark classifier Support masking the nfmark value before the search. The mask value is global for all filters contained in one instance. It can only be set when a new instance is created, all filters must specify the same mask. Signed-off-by: Patrick McHardy [EMAIL PROTECTED] Applied, thanks a lot. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: IPSec kernel oops on ppc64
On Fri, 25 Aug 2006, Joy Latten wrote: I installed 2.6.17 + patch-2.6.18-rc4 + 2.6.18-rc4-mm2 onto two pSeries power 5 (ppc64 lpars) machines. I configured IPSec using the configuration listed below. Confirming that this does not crash on i686 or x86_64. - james -- James Morris [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: IPSec kernel oops on ppc64
From: James Morris [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 19:15:57 -0400 (EDT) On Fri, 25 Aug 2006, Joy Latten wrote: I installed 2.6.17 + patch-2.6.18-rc4 + 2.6.18-rc4-mm2 onto two pSeries power 5 (ppc64 lpars) machines. I configured IPSec using the configuration listed below. Confirming that this does not crash on i686 or x86_64. Probably best to start with retesting with 2.6.18-rc4, and if that crashes too it is time to think seriously about a miscompile on ppc64. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] nfs: deadlock prevention for NFS
On Fri, 2006-08-25 at 16:14 -0400, Trond Myklebust wrote: Grumble... If your patches are targetting NFS, could you please at the very least Cc [EMAIL PROTECTED] and/or myself. Sorry, will make sure you're on the CC list next round. On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote: Provide a proper a_ops-swapfile() implementation for NFS. This will set the NFS socket to SOCK_VMIO and put the socket reconnection under PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done). Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- fs/nfs/file.c | 21 - include/linux/sunrpc/xprt.h |4 +++- net/sunrpc/xprtsock.c | 16 3 files changed, 39 insertions(+), 2 deletions(-) Index: linux-2.6/fs/nfs/file.c === --- linux-2.6.orig/fs/nfs/file.c +++ linux-2.6/fs/nfs/file.c @@ -27,6 +27,7 @@ #include linux/slab.h #include linux/pagemap.h #include linux/smp_lock.h +#include net/sock.h #include asm/uaccess.h #include asm/system.h @@ -317,7 +318,25 @@ static int nfs_release_page(struct page static int nfs_swapfile(struct address_space *mapping, int enable) { - return 0; + int err = -EINVAL; + struct rpc_clnt *client = NFS_CLIENT(mapping-host); + struct sock *sk = client-cl_xprt-inet; + + if (enable) { + client-cl_xprt-swapper = 1; + /* +* keep one extra sock reference so the reserve won't dip +* when the socket gets reconnected. +*/ + sk_adjust_memalloc(1, 1); + err = sk_set_vmio(sk); + } else if (client-cl_xprt-swapper) { + client-cl_xprt-swapper = 0; + sk_adjust_memalloc(-1, -1); + err = sk_clear_vmio(sk); + } + + return err; } This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business screwing around with the internals of the sunrpc transport. Ok, I'll make a function there, and call that. const struct address_space_operations nfs_file_aops = { Index: linux-2.6/net/sunrpc/xprtsock.c === --- linux-2.6.orig/net/sunrpc/xprtsock.c +++ linux-2.6/net/sunrpc/xprtsock.c @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *) args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void * dprintk(RPC: xs_udp_connect_worker for xprt %p\n, xprt); + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + /* Start by resetting any existing state */ xs_close(xprt); @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void * xprt-sock = sock; xprt-inet = sk; + if (xprt-swapper) + sk_set_vmio(sk); + write_unlock_bh(sk-sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void * out: xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + current-flags = pflags; } /* @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *)args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) goto out; + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + dprintk(RPC: xs_tcp_connect_worker for xprt %p\n, xprt); if (!xprt-sock) { @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void * break; } } + + if (xprt-swapper) + sk_set_vmio(xprt-inet); out: xprt_wake_pending_tasks(xprt, status); out_clear: xprt_clear_connecting(xprt); + current-flags = pflags; } How does this guarantee that the socket reconnection won't fail? I was afraid this might not be enough, I really have to go through the network code. Also, what about the case of rpc_malloc()? Can't that cause rpciod to deadlock when you add NFS swap into the equation? I will have to plead ignorance for now, I'll look into this on monday. On first glance it looks like rpc_malloc could use an |__GFP_EMERG for RPC_TASK_SWAPPER. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] nfs: deadlock prevention for NFS
Grumble... If your patches are targetting NFS, could you please at the very least Cc [EMAIL PROTECTED] and/or myself. On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote: Provide a proper a_ops-swapfile() implementation for NFS. This will set the NFS socket to SOCK_VMIO and put the socket reconnection under PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done). Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- fs/nfs/file.c | 21 - include/linux/sunrpc/xprt.h |4 +++- net/sunrpc/xprtsock.c | 16 3 files changed, 39 insertions(+), 2 deletions(-) Index: linux-2.6/fs/nfs/file.c === --- linux-2.6.orig/fs/nfs/file.c +++ linux-2.6/fs/nfs/file.c @@ -27,6 +27,7 @@ #include linux/slab.h #include linux/pagemap.h #include linux/smp_lock.h +#include net/sock.h #include asm/uaccess.h #include asm/system.h @@ -317,7 +318,25 @@ static int nfs_release_page(struct page static int nfs_swapfile(struct address_space *mapping, int enable) { - return 0; + int err = -EINVAL; + struct rpc_clnt *client = NFS_CLIENT(mapping-host); + struct sock *sk = client-cl_xprt-inet; + + if (enable) { + client-cl_xprt-swapper = 1; + /* + * keep one extra sock reference so the reserve won't dip + * when the socket gets reconnected. + */ + sk_adjust_memalloc(1, 1); + err = sk_set_vmio(sk); + } else if (client-cl_xprt-swapper) { + client-cl_xprt-swapper = 0; + sk_adjust_memalloc(-1, -1); + err = sk_clear_vmio(sk); + } + + return err; } This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business screwing around with the internals of the sunrpc transport. const struct address_space_operations nfs_file_aops = { Index: linux-2.6/net/sunrpc/xprtsock.c === --- linux-2.6.orig/net/sunrpc/xprtsock.c +++ linux-2.6/net/sunrpc/xprtsock.c @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *) args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void * dprintk(RPC: xs_udp_connect_worker for xprt %p\n, xprt); + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + /* Start by resetting any existing state */ xs_close(xprt); @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void * xprt-sock = sock; xprt-inet = sk; + if (xprt-swapper) + sk_set_vmio(sk); + write_unlock_bh(sk-sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void * out: xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + current-flags = pflags; } /* @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void * { struct rpc_xprt *xprt = (struct rpc_xprt *)args; struct socket *sock = xprt-sock; + unsigned long pflags = current-flags; int err, status = -EIO; if (xprt-shutdown || xprt-addr.sin_port == 0) goto out; + if (xprt-swapper) + current-flags |= PF_MEMALLOC; + dprintk(RPC: xs_tcp_connect_worker for xprt %p\n, xprt); if (!xprt-sock) { @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void * break; } } + + if (xprt-swapper) + sk_set_vmio(xprt-inet); out: xprt_wake_pending_tasks(xprt, status); out_clear: xprt_clear_connecting(xprt); + current-flags = pflags; } How does this guarantee that the socket reconnection won't fail? Also, what about the case of rpc_malloc()? Can't that cause rpciod to deadlock when you add NFS swap into the equation? Cheers, Trond - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] IPV6 : segmentation offload not set correctly on TCP children
TCP over IPV6 would incorrectly inherit the GSO settings. This would cause kernel to send Tcp Segmentation Offload packets for IPV6 data to devices that can't handle it. It caused the sky2 driver to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050 and the e1000 would generate bogus packets. I can't blame the hardware for gagging if the upper layers feed it garbage. This was a new bug in 2.6.18 introduced with GSO support. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- linux-2.6.orig/net/ipv6/tcp_ipv6.c 2006-08-03 09:09:16.0 -0700 +++ linux-2.6/net/ipv6/tcp_ipv6.c 2006-08-25 15:30:31.0 -0700 @@ -944,7 +944,7 @@ * comment in that function for the gory details. -acme */ - sk-sk_gso_type = SKB_GSO_TCPV6; + newsk-sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IPV6 : segmentation offload not set correctly on TCP children
From: Stephen Hemminger [EMAIL PROTECTED] Date: Fri, 25 Aug 2006 15:43:53 -0700 TCP over IPV6 would incorrectly inherit the GSO settings. This would cause kernel to send Tcp Segmentation Offload packets for IPV6 data to devices that can't handle it. It caused the sky2 driver to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050 and the e1000 would generate bogus packets. I can't blame the hardware for gagging if the upper layers feed it garbage. This was a new bug in 2.6.18 introduced with GSO support. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] Good catch. Applied, thanks Stephen. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [2.6.17.8] NFS stall / BUG in UDP fragment processing / SKB trimming
On Sun, Aug 13, 2006 at 10:59:11PM +1000, Herbert Xu wrote: On Sat, Aug 12, 2006 at 09:19:19PM +, Nix wrote: The kernel log showed a heap of BUGs from somewhere inside the skb management layer, somewhere in UDP fragment processing while handling NFS requests. It starts like this: Aug 12 21:31:08 hades warning: kernel: BUG: warning at include/linux/skbuff.h:975/__skb_trim() Aug 12 21:31:08 hades warning: kernel: c030ed39 ip_append_data+0x5b3/0x951 c030fc18 ip_generic_getfrag+0x0/0x96 Oops, I missed this code path when I disallowed skb_trim from operating on a paged skb. This patch should fix the problem. Greg, we need this for 2.6.17 stable as well if Dave is OK with it. This patch doesn't apply at all to the latest 2.6.17-stable kernel tree. Care to rediff it? thanks, greg k-h - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IPV6 : segmentation offload not set correctly on TCP children
Hello Stephen, thanks for the fix, it fixes the problem for me. I closed the bug. On which hardware did you reproduce the bug and how did you found it? Did you use git bisect? Thomas - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [IPV6] Q: corrupt checksums when transferring data
Stephen Hemminger wrote: I think this the problem. Does it fix e1000? I am testing now. TCP over IPV6 would incorrectly inherit the GSO settings on accepted children. --- linux-2.6.orig/net/ipv6/tcp_ipv6.c2006-08-03 09:09:16.0 -0700 +++ linux-2.6/net/ipv6/tcp_ipv6.c 2006-08-25 15:30:31.0 -0700 @@ -944,7 +944,7 @@ * comment in that function for the gory details. -acme */ - sk-sk_gso_type = SKB_GSO_TCPV6; + newsk-sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; ah, no more errors, I didn't go through and validate much more past that. I'm now able to do hardware offloads with no errors. I think it's a good patch, at least it makes sense to me and works for me. Thanks! Jesse - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] tcp_bic: use BUILD_BUG_ON
Please fix the other variants as well htcp, cubic, veno, vegas, ... Just one patch is necessary with all the files. -- Stephen Hemminger [EMAIL PROTECTED] All non-trivial abstractions, to some degree, are leaky. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ethtool v4: add autoneg advertise feature
On Thu, 24 Aug 2006, Michael Chan wrote: Jeff Kirsher wrote: The old way of setting autonegotiation was using the following command: ethtool -s ethx speed 100 duplex full auto on now the command would be ethtool -s ethx auto on advertise 0x08 both commands would result in only advertising 100 FULL. There still needs to be a change made to the man file to reflect the change in the behavior of ethtool, which I have not done. But this patch will allow for greater flexibility in setting autonegotiation speeds. It is more flexible, but less intuitive. The user now has to remember hex values instead of the more intuitive speed and duplex. Perhaps we can keep the old method of using speed and duplex, while adding the new method of specifying hex values? I agree. Something like: ethtool -s ethx auto on advertise mode1+mode2+...+moden For example: ethtool -s ethx auto on advertise 100-half+100-full to set speed 100 either half or full duplex. Maybe have some abbreviations such as 100-all (same as above) or all-half (for all supported half duplex) or just all (for all supported modes), which I suppose is the default. Just an idea. -Bill - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup
On 8/25/06, Michael Chan [EMAIL PROTECTED] wrote: The reason is that TSO on 5704 and older chips is done by firmware. ASF is also implemented by firmware. If ASF is enabled, there is no room to do TSO and ASF at the same time. Just for test purpose, I have applied the following patch to my tg3.c. I now seem to be able to enable/disable TSO, but I admit don't know whether ASF is still functional or not. else if (GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5700 || GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5701 || - tp-pci_chip_rev_id == CHIPREV_ID_5705_A0 || - (tp-tg3_flags TG3_FLAG_ENABLE_ASF) != 0) { + tp-pci_chip_rev_id == CHIPREV_ID_5705_A0) { tp-tg3_flags2 = ~TG3_FLG2_TSO_CAPABLE; } else { Then tg3 considered my interface as TSO-capable (TSOcap[1] in dmesg). TSO was still disabled by default, which is normal because there is this other check a couple of lines below: /* TSO is on by default on chips that support hardware TSO. * Firmware TSO on older chips gives lower performance, so it * is off by default, but can be enabled using ethtool. */ if (tp-tg3_flags2 TG3_FLG2_HW_TSO) dev-features |= NETIF_F_TSO; But I was able to turn TSO on via ethtool -K. This is exactly the behavior I would like to see in tg3. So are you saying the patch I applied actually breaks ASF ? Firmware-based TSO is actually slower than no TSO. The only benefit is a little better CPU utilization. I know, in one of my test-cases, firmware TSO reduces the max achievable TCP bandwidth from 930 to 840 Mbit/s on a GigE network while reducing the CPU utilization from 44% to 22%. I think firmware TSO still makes sense in some cases. -- Marc Bevand - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Congestion control (modulo lp, bic): use BUILD_BUG_ON
Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED] --- net/ipv4/tcp_cubic.c |2 +- net/ipv4/tcp_highspeed.c |2 +- net/ipv4/tcp_htcp.c |2 +- net/ipv4/tcp_hybla.c |2 +- net/ipv4/tcp_vegas.c |2 +- net/ipv4/tcp_veno.c |2 +- net/ipv4/tcp_westwood.c |2 +- 7 files changed, 7 insertions(+), 7 deletions(-) --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictc static int __init cubictcp_register(void) { - BUG_ON(sizeof(struct bictcp) ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct bictcp) ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_hig static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hstcp) ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(tcp_highspeed); } --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct htcp) ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN = BETA_MAX); return tcp_register_congestion_control(htcp); } --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hyb static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hybla) ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(tcp_hybla); } --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_veg static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct vegas) ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(tcp_vegas); return 0; } --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -213,7 +213,7 @@ static struct tcp_congestion_ops tcp_ven static int __init tcp_veno_register(void) { - BUG_ON(sizeof(struct veno) ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct veno) ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(tcp_veno); return 0; } --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_wes static int __init tcp_westwood_register(void) { - BUG_ON(sizeof(struct westwood) ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct westwood) ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(tcp_westwood); } - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html