date:20060825

[1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread Evgeniy Polyakov

Hello.

I was asked several times to include userspace example code into
Documentation, so if there is no policy against it, consider attached patch 
for 2.6.18. This program works with included Documentation/connector/cn_test.c 
connector module.
Thank you.

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

--- /dev/null   2006-08-23 17:09:03.438578500 +0400
+++ ./Documentation/connector/ucon.c2006-08-25 11:06:57.0 +0400
@@ -0,0 +1,205 @@
+/*
+ * ucon.c
+ *
+ * Copyright (c) 2004+ Evgeniy Polyakov [EMAIL PROTECTED]
+ * 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include asm/types.h
+
+#include sys/types.h
+#include sys/socket.h
+#include sys/poll.h
+
+#include linux/netlink.h
+#include linux/rtnetlink.h
+
+#include arpa/inet.h
+
+#include stdio.h
+#include stdlib.h
+#include unistd.h
+#include string.h
+#include errno.h
+#include time.h
+
+#include linux/connector.h
+
+#define DEBUG
+#define NETLINK_CONNECTOR  11
+
+#ifdef DEBUG
+#define ulog(f, a...) fprintf(stdout, f, ##a)
+#else
+#define ulog(f, a...) do {} while (0)
+#endif
+
+static int need_exit;
+static __u32 seq;
+
+static int netlink_send(int s, struct cn_msg *msg)
+{
+   struct nlmsghdr *nlh;
+   unsigned int size;
+   int err;
+   char buf[128];
+   struct cn_msg *m;
+   
+   size = NLMSG_SPACE(sizeof(struct cn_msg) + msg-len);
+
+   nlh = (struct nlmsghdr *)buf;
+   nlh-nlmsg_seq = seq++;
+   nlh-nlmsg_pid = getpid();
+   nlh-nlmsg_type = NLMSG_DONE;
+   nlh-nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh));
+   nlh-nlmsg_flags = 0;
+
+   m = NLMSG_DATA(nlh);
+#if 0
+   ulog(%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n,
+  __func__, msg-id.idx, msg-id.val, msg-len, msg-seq, 
msg-ack);
+#endif
+   memcpy(m, msg, sizeof(*m) + msg-len);
+
+   err = send(s, nlh, size, 0);
+   if (err == -1)
+   ulog(Failed to send: %s [%d].\n,
+   strerror(errno), errno);
+
+   return err;
+}
+
+int main(int argc, char *argv[])
+{
+   int s;
+   char buf[1024];
+   int len;
+   struct nlmsghdr *reply;
+   struct sockaddr_nl l_local;
+   struct cn_msg *data;
+   FILE *out;
+   time_t tm;
+   struct pollfd pfd;
+
+   if (argc  2)
+   out = stdout;
+   else {
+   out = fopen(argv[1], a+);
+   if (!out) {
+   ulog(Unable to open %s for writing: %s\n,
+   argv[1], strerror(errno));
+   out = stdout;
+   }
+   }
+
+   memset(buf, 0, sizeof(buf));
+
+   s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+   if (s == -1) {
+   perror(socket);
+   return -1;
+   }
+
+   l_local.nl_family = AF_NETLINK;
+   l_local.nl_groups = 0x123;
+   l_local.nl_pid = 0;
+
+   if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == 
-1) {
+   perror(bind);
+   close(s);
+   return -1;
+   }
+
+   {
+   int on = l_local.nl_groups;
+   setsockopt(s, 270, 1, on, sizeof(on));
+   }
+   
+   if (0) {
+   int i, j;
+   
+   memset(buf, 0, sizeof(buf));
+   
+   data = (struct cn_msg *)buf;
+
+   data-id.idx = 0x123;
+   data-id.val = 0x456;
+   data-seq = seq++;
+   data-ack = 0;
+   data-len = 0;
+
+   for (j=0; j10; ++j) {
+   for (i=0; i1000; ++i) {
+   len = netlink_send(s, data);
+   }
+
+   ulog(%d messages have been sent to %08x.%08x.\n, i, 
data-id.idx, data-id.val);
+   }
+
+   return 0;
+   }
+   
+
+   pfd.fd = s;
+
+   while (!need_exit) {
+   pfd.events = POLLIN;
+   pfd.revents = 0;
+   switch (poll(pfd, 1, -1)) {
+   case 0:
+   need_exit = 1;
+   break;
+   case -1:
+   if (errno != EINTR) {
+   need_exit

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread Patrick McHardy

Evgeniy Polyakov wrote:
 Hello.
 
 I was asked several times to include userspace example code into
 Documentation, so if there is no policy against it, consider attached patch 
 for 2.6.18. This program works with included 
 Documentation/connector/cn_test.c 
 connector module.

 + l_local.nl_family = AF_NETLINK;
 + l_local.nl_groups = 0x123;
 + l_local.nl_pid = 0;
 +
 + if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == 
 -1) {
 + perror(bind);
 + close(s);
 + return -1;
 + }
 +
 + {
 + int on = l_local.nl_groups;
 + setsockopt(s, 270, 1, on, sizeof(on));
 + }

Example code shouldn't use magic numbers, please use the proper defines.
And the code is wrong, using the same group number for bind (which takes
a bitmask) and setsockopt (which takes a group number) doesn't work.
Its not necessary to use setsockopt if you already used bind anyway.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread Evgeniy Polyakov

On Fri, Aug 25, 2006 at 08:57:23AM +0200, Patrick McHardy ([EMAIL PROTECTED]) 
wrote:
 Evgeniy Polyakov wrote:
  Hello.
  
  I was asked several times to include userspace example code into
  Documentation, so if there is no policy against it, consider attached patch 
  for 2.6.18. This program works with included 
  Documentation/connector/cn_test.c 
  connector module.
 
  +   l_local.nl_family = AF_NETLINK;
  +   l_local.nl_groups = 0x123;
  +   l_local.nl_pid = 0;
  +
  +   if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == 
  -1) {
  +   perror(bind);
  +   close(s);
  +   return -1;
  +   }
  +
  +   {
  +   int on = l_local.nl_groups;
  +   setsockopt(s, 270, 1, on, sizeof(on));
  +   }
 
 Example code shouldn't use magic numbers, please use the proper defines.
 And the code is wrong, using the same group number for bind (which takes
 a bitmask) and setsockopt (which takes a group number) doesn't work.
 Its not necessary to use setsockopt if you already used bind anyway.

I put there explicit socket option to show how it works in case there
will be several group requests, which will not be placed into initial
bind call.
But you are right, that magic numbers are not that good.
I will update program with appropriate changes.
Thank you.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread Evgeniy Polyakov

Hello.

I was asked several times to include userspace example code into
Documentation, so if there is no policy against it, consider attached patch 
for 2.6.18. This program works with included Documentation/connector/cn_test.c 
connector module.
Thank you.

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

--- /dev/null   2006-08-23 17:09:03.438578500 +0400
+++ ./Documentation/connector/ucon.c2006-08-25 11:31:48.0 +0400
@@ -0,0 +1,206 @@
+/*
+ * ucon.c
+ *
+ * Copyright (c) 2004+ Evgeniy Polyakov [EMAIL PROTECTED]
+ * 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include asm/types.h
+
+#include sys/types.h
+#include sys/socket.h
+#include sys/poll.h
+
+#include linux/netlink.h
+#include linux/rtnetlink.h
+
+#include arpa/inet.h
+
+#include stdio.h
+#include stdlib.h
+#include unistd.h
+#include string.h
+#include errno.h
+#include time.h
+
+#include linux/connector.h
+
+#define DEBUG
+#define NETLINK_CONNECTOR  11
+
+#ifdef DEBUG
+#define ulog(f, a...) fprintf(stdout, f, ##a)
+#else
+#define ulog(f, a...) do {} while (0)
+#endif
+
+static int need_exit;
+static __u32 seq;
+
+static int netlink_send(int s, struct cn_msg *msg)
+{
+   struct nlmsghdr *nlh;
+   unsigned int size;
+   int err;
+   char buf[128];
+   struct cn_msg *m;
+   
+   size = NLMSG_SPACE(sizeof(struct cn_msg) + msg-len);
+
+   nlh = (struct nlmsghdr *)buf;
+   nlh-nlmsg_seq = seq++;
+   nlh-nlmsg_pid = getpid();
+   nlh-nlmsg_type = NLMSG_DONE;
+   nlh-nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh));
+   nlh-nlmsg_flags = 0;
+
+   m = NLMSG_DATA(nlh);
+#if 0
+   ulog(%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n,
+  __func__, msg-id.idx, msg-id.val, msg-len, msg-seq, 
msg-ack);
+#endif
+   memcpy(m, msg, sizeof(*m) + msg-len);
+
+   err = send(s, nlh, size, 0);
+   if (err == -1)
+   ulog(Failed to send: %s [%d].\n,
+   strerror(errno), errno);
+
+   return err;
+}
+
+int main(int argc, char *argv[])
+{
+   int s;
+   char buf[1024];
+   int len;
+   struct nlmsghdr *reply;
+   struct sockaddr_nl l_local;
+   struct cn_msg *data;
+   FILE *out;
+   time_t tm;
+   struct pollfd pfd;
+
+   if (argc  2)
+   out = stdout;
+   else {
+   out = fopen(argv[1], a+);
+   if (!out) {
+   ulog(Unable to open %s for writing: %s\n,
+   argv[1], strerror(errno));
+   out = stdout;
+   }
+   }
+
+   memset(buf, 0, sizeof(buf));
+
+   s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+   if (s == -1) {
+   perror(socket);
+   return -1;
+   }
+
+   l_local.nl_family = AF_NETLINK;
+   l_local.nl_groups = 0x123; /* bitmask of requested groups */
+   l_local.nl_pid = 0;
+
+   if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == 
-1) {
+   perror(bind);
+   close(s);
+   return -1;
+   }
+
+#if 0
+   {
+   int on = 0x57; /* Additional group number */
+   setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, on, 
sizeof(on));
+   }
+#endif 
+   if (0) {
+   int i, j;
+   
+   memset(buf, 0, sizeof(buf));
+   
+   data = (struct cn_msg *)buf;
+
+   data-id.idx = 0x123;
+   data-id.val = 0x456;
+   data-seq = seq++;
+   data-ack = 0;
+   data-len = 0;
+
+   for (j=0; j10; ++j) {
+   for (i=0; i1000; ++i) {
+   len = netlink_send(s, data);
+   }
+
+   ulog(%d messages have been sent to %08x.%08x.\n, i, 
data-id.idx, data-id.val);
+   }
+
+   return 0;
+   }
+   
+
+   pfd.fd = s;
+
+   while (!need_exit) {
+   pfd.events = POLLIN;
+   pfd.revents = 0;
+   switch (poll(pfd, 1, -1)) {
+   case 0:
+   need_exit = 1;
+   break;
+   case -1:
+

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread Patrick McHardy

Evgeniy Polyakov wrote:
 + l_local.nl_family = AF_NETLINK;
 + l_local.nl_groups = 0x123; /* bitmask of requested groups */
 + l_local.nl_pid = 0;
 +
 + if (bind(s, (struct sockaddr *)l_local, sizeof(struct sockaddr_nl)) == 
 -1) {
 + perror(bind);
 + close(s);
 + return -1;
 + }
 +
 +#if 0
 + {
 + int on = 0x57; /* Additional group number */
 + setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, on, 
 sizeof(on));
 + }
 +#endif   
 

That looks better, thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PATCH] IPv6 Updates for net-2.6.19

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

Hello.

Please pull
  git://git.skbuff.net/gitroot/yoshfuji/net-2.6.19-20060825-inet6
for the following updates on top of the net-2.6.19 tree.

Regards,

HEADLINES
-

[IPV6] MIP6: Several obvious clean-ups.
[IPV6] ROUTE: Routing by Traffic Class.
[IPV6] ROUTE: Routing by FWMARK.
[NET]: Add common helper functions to convert IPv6/IPv4 address string to 
network address structure.
[NETFILTER] NF_CONNTRACK_FTP: Use in6_pton() to convert address string.

DIFFSTAT


 include/linux/fib_rules.h|2 
 include/linux/inet.h |2 
 include/net/flow.h   |2 
 net/core/utils.c |  215 ++
 net/ipv6/Kconfig |7 +
 net/ipv6/ah6.c   |   45 +---
 net/ipv6/exthdrs.c   |1 
 net/ipv6/fib6_rules.c|   26 +
 net/ipv6/mip6.c  |6 +
 net/ipv6/route.c |1 
 net/netfilter/nf_conntrack_ftp.c |   96 +
 11 files changed, 268 insertions(+), 135 deletions(-)

CHANGESETS
--

commit 6dabb77fd82cd927727d5fb8136eff2e123910f5
Author: YOSHIFUJI Hideaki [EMAIL PROTECTED]
Date:   Thu Aug 24 23:18:12 2006 +0900

[IPV6] MIP6: Several obvious clean-ups.

- Remove redundant code.  Pointed out by Brian Haley [EMAIL PROTECTED].
- Unify code paths with/without CONFIG_IPV6_MIP.
- Use NIP6_FMT for IPv6 address textual presentation.
- Fold long line.  Pointed out by David Miller [EMAIL PROTECTED].

Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 164546b..9b007eb 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -128,9 +128,7 @@ static void ipv6_rearrange_destopt(struc
off += optlen;
len -= optlen;
}
-   if (len == 0)
-   return;
-
+   /* Note: ok if len == 0 */
 bad:
return;
 }
@@ -175,11 +173,7 @@ static void ipv6_rearrange_rthdr(struct 
ipv6_addr_copy(iph-daddr, final_addr);
 }
 
-#ifdef CONFIG_IPV6_MIP6
 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
-#else
-static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
-#endif
 {
union {
struct ipv6hdr *iph;
@@ -194,30 +188,12 @@ #endif
 
while (exthdr.raw  end) {
switch (nexthdr) {
-#ifdef CONFIG_IPV6_MIP6
-   case NEXTHDR_HOP:
-   if (!zero_out_mutable_opts(exthdr.opth)) {
-   LIMIT_NETDEBUG(
-   KERN_WARNING overrun %sopts\n,
-   nexthdr == NEXTHDR_HOP ?
-   hop : dest);
-   return -EINVAL;
-   }
-   break;
case NEXTHDR_DEST:
+#ifdef CONFIG_IPV6_MIP6
if (dir == XFRM_POLICY_OUT)
ipv6_rearrange_destopt(iph, exthdr.opth);
-   if (!zero_out_mutable_opts(exthdr.opth)) {
-   LIMIT_NETDEBUG(
-   KERN_WARNING overrun %sopts\n,
-   nexthdr == NEXTHDR_HOP ?
-   hop : dest);
-   return -EINVAL;
-   }
-   break;
-#else
+#endif
case NEXTHDR_HOP:
-   case NEXTHDR_DEST:
if (!zero_out_mutable_opts(exthdr.opth)) {
LIMIT_NETDEBUG(
KERN_WARNING overrun %sopts\n,
@@ -226,7 +202,6 @@ #else
return -EINVAL;
}
break;
-#endif
 
case NEXTHDR_ROUTING:
ipv6_rearrange_rthdr(iph, exthdr.rth);
@@ -282,16 +257,13 @@ #endif
}
 #ifdef CONFIG_IPV6_MIP6
memcpy(tmp_ext, top_iph-saddr, extlen);
-   err = ipv6_clear_mutable_options(top_iph,
-extlen - sizeof(*tmp_ext) +
-sizeof(*top_iph),
-XFRM_POLICY_OUT);
 #else
memcpy(tmp_ext, top_iph-daddr, extlen);
+#endif
err = ipv6_clear_mutable_options(top_iph,
 extlen - sizeof(*tmp_ext) +
-sizeof(*top_iph));
-#endif
+sizeof(*top_iph),
+XFRM_POLICY_OUT);
if (err)
goto error_free_iph;
}
@@ -382,13 +354,8 @@ static int ah6_input(struct xfrm_state *
if (!tmp_hdr

Re: [PATCH] tcp_lp: use BUILD_BUG_ON

2006-08-25 Thread David Miller

From: Alexey Dobriyan [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 05:08:31 +0400

 Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED]

Applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcp_bic: use BUILD_BUG_ON

2006-08-25 Thread David Miller

From: Alexey Dobriyan [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 05:08:02 +0400

 Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED]

Applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread David Miller

From: Evgeniy Polyakov [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 11:11:02 +0400

 Hello.

 I was asked several times to include userspace example code into
 Documentation, so if there is no policy against it, consider attached patch 
 for 2.6.18. This program works with included 
 Documentation/connector/cn_test.c 
 connector module.
 Thank you.

 Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

Fair enough, applied (after killing all of the trailing whitespace).

Thanks.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread Evgeniy Polyakov

On Fri, Aug 25, 2006 at 12:52:33AM -0700, David Miller ([EMAIL PROTECTED]) 
wrote:
 From: Evgeniy Polyakov [EMAIL PROTECTED]
 Date: Fri, 25 Aug 2006 11:11:02 +0400

  Hello.

  I was asked several times to include userspace example code into
  Documentation, so if there is no policy against it, consider attached patch 
  for 2.6.18. This program works with included 
  Documentation/connector/cn_test.c 
  connector module.
  Thank you.

  Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

 Fair enough, applied (after killing all of the trailing whitespace).

I mailed it myself and applied to 2.6.18 git tree - patch -p1 did not
complain for sure :)

Thank you.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread David Miller

From: Evgeniy Polyakov [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 12:15:03 +0400

 I mailed it myself and applied to 2.6.18 git tree - patch -p1 did
 not complain for sure :)

GIT always complains very loudly about any trailing whitespace on any
lines, patch is too dumb to do that.

You do not need to use GIT trees to check this, just run:

git apply --check --whitespace=error-all $PATCH

and it will let you know.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PATCH] IPv6 Updates for net-2.6.19

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 03:00:34 +0900 (JST)), 
YOSHIFUJI Hideaki [EMAIL PROTECTED] says:

 Please pull
   git://git.skbuff.net/gitroot/yoshfuji/net-2.6.19-20060825-inet6

 +int in6_pton(const char *src, int srclen,
:
 + printf(srclen=%d\n, srclen);

(FYI, Dave has pointed out and fixed up this this but anyway,)
My fault... Here's the updated version.  It is available at the same URL.

I'll be more careful in the future.

HEADLINES
-

[IPV6] MIP6: Several obvious clean-ups.
[IPV6] ROUTE: Routing by Traffic Class.
[IPV6] ROUTE: Routing by FWMARK.
[NET]: Add common helper functions to convert IPv6/IPv4 address string to 
network address structure.
[NETFILTER] NF_CONNTRACK_FTP: Use in6_pton() to convert address string.

DIFFSTAT


 include/linux/fib_rules.h|2 
 include/linux/inet.h |2 
 include/net/flow.h   |2 
 net/core/utils.c |  213 ++
 net/ipv6/Kconfig |7 +
 net/ipv6/ah6.c   |   45 +---
 net/ipv6/exthdrs.c   |1 
 net/ipv6/fib6_rules.c|   26 +
 net/ipv6/mip6.c  |6 +
 net/ipv6/route.c |1 
 net/netfilter/nf_conntrack_ftp.c |   96 +
 11 files changed, 266 insertions(+), 135 deletions(-)

CHANGESETS
--

commit 6dabb77fd82cd927727d5fb8136eff2e123910f5
Author: YOSHIFUJI Hideaki [EMAIL PROTECTED]
Date:   Thu Aug 24 23:18:12 2006 +0900

[IPV6] MIP6: Several obvious clean-ups.

- Remove redundant code.  Pointed out by Brian Haley [EMAIL PROTECTED].
- Unify code paths with/without CONFIG_IPV6_MIP.
- Use NIP6_FMT for IPv6 address textual presentation.
- Fold long line.  Pointed out by David Miller [EMAIL PROTECTED].

Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 164546b..9b007eb 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -128,9 +128,7 @@ static void ipv6_rearrange_destopt(struc
off += optlen;
len -= optlen;
}
-   if (len == 0)
-   return;
-
+   /* Note: ok if len == 0 */
 bad:
return;
 }
@@ -175,11 +173,7 @@ static void ipv6_rearrange_rthdr(struct 
ipv6_addr_copy(iph-daddr, final_addr);
 }
 
-#ifdef CONFIG_IPV6_MIP6
 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
-#else
-static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
-#endif
 {
union {
struct ipv6hdr *iph;
@@ -194,30 +188,12 @@ #endif
 
while (exthdr.raw  end) {
switch (nexthdr) {
-#ifdef CONFIG_IPV6_MIP6
-   case NEXTHDR_HOP:
-   if (!zero_out_mutable_opts(exthdr.opth)) {
-   LIMIT_NETDEBUG(
-   KERN_WARNING overrun %sopts\n,
-   nexthdr == NEXTHDR_HOP ?
-   hop : dest);
-   return -EINVAL;
-   }
-   break;
case NEXTHDR_DEST:
+#ifdef CONFIG_IPV6_MIP6
if (dir == XFRM_POLICY_OUT)
ipv6_rearrange_destopt(iph, exthdr.opth);
-   if (!zero_out_mutable_opts(exthdr.opth)) {
-   LIMIT_NETDEBUG(
-   KERN_WARNING overrun %sopts\n,
-   nexthdr == NEXTHDR_HOP ?
-   hop : dest);
-   return -EINVAL;
-   }
-   break;
-#else
+#endif
case NEXTHDR_HOP:
-   case NEXTHDR_DEST:
if (!zero_out_mutable_opts(exthdr.opth)) {
LIMIT_NETDEBUG(
KERN_WARNING overrun %sopts\n,
@@ -226,7 +202,6 @@ #else
return -EINVAL;
}
break;
-#endif
 
case NEXTHDR_ROUTING:
ipv6_rearrange_rthdr(iph, exthdr.rth);
@@ -282,16 +257,13 @@ #endif
}
 #ifdef CONFIG_IPV6_MIP6
memcpy(tmp_ext, top_iph-saddr, extlen);
-   err = ipv6_clear_mutable_options(top_iph,
-extlen - sizeof(*tmp_ext) +
-sizeof(*top_iph),
-XFRM_POLICY_OUT);
 #else
memcpy(tmp_ext, top_iph-daddr, extlen);
+#endif
err = ipv6_clear_mutable_options(top_iph,
 extlen - sizeof(*tmp_ext) +
-sizeof(*top_iph));
-#endif

Re: [1/1] connector: add userspace example code into Documentation/connector/

2006-08-25 Thread Evgeniy Polyakov

On Fri, Aug 25, 2006 at 01:17:27AM -0700, David Miller ([EMAIL PROTECTED]) 
wrote:
  I mailed it myself and applied to 2.6.18 git tree - patch -p1 did
  not complain for sure :)
 
 GIT always complains very loudly about any trailing whitespace on any
 lines, patch is too dumb to do that.
 
 You do not need to use GIT trees to check this, just run:
 
 git apply --check --whitespace=error-all $PATCH
 
 and it will let you know.

Hmm, how many interesting things git contain...
I will definitely use this feature, thanks David.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2]d80211: fix wpa_supplicant reassoc problem

2006-08-25 Thread Hong Liu

After key negotiation completed using wpa_supplicant, wpa_supplicant
can't reassoc with the AP if we reboot the AP. It always fails at the
4-way handshake.
The problem is the key info is not cleared correctly. Thus when
wpa_supplicant send the EAPOL-KEY packet, the d80211 stack finds the old
key and uses it to encrypt the packet.

The patch removes the sta_info when we disassociate with AP.

Thanks,
Hong
diff --git a/net/d80211/ieee80211_sta.c b/net/d80211/ieee80211_sta.c
index 8caf352..2144b34 100644
--- a/net/d80211/ieee80211_sta.c
+++ b/net/d80211/ieee80211_sta.c
@@ -739,6 +739,14 @@ static void ieee80211_associated(struct 
 		wireless_send_event(dev, SIOCGIWAP, wrqu, NULL);
 		mod_timer(ifsta-timer,
 			  jiffies + IEEE80211_MONITORING_INTERVAL + 30 * HZ);
+
+		sta = sta_info_get(local, ifsta-bssid);
+		if (sta) {
+			sta_info_free(sta, 0);
+			sta_info_put(sta);
+		}
+
+		ifsta-probereq_poll = 0;
 	} else {
 		mod_timer(ifsta-timer,
 			  jiffies + IEEE80211_MONITORING_INTERVAL);
diff --git a/net/d80211/sta_info.c b/net/d80211/sta_info.c
index 7f5febe..8902816 100644
--- a/net/d80211/sta_info.c
+++ b/net/d80211/sta_info.c
@@ -197,6 +197,12 @@ #ifdef CONFIG_D80211_VERBOSE_DEBUG
 	   local-mdev-name, MAC_ARG(sta-addr));
 #endif /* CONFIG_D80211_VERBOSE_DEBUG */
 
+	if (sta-key) {
+		ieee80211_key_sysfs_remove(sta-key);
+		ieee80211_key_free(sta-key);
+		sta-key = NULL;
+	}
+
 	rate_control_remove_sta_attrs(local, sta-rate_ctrl_priv, sta-kobj);
 	ieee80211_sta_sysfs_remove(sta);
 
@@ -244,8 +250,6 @@ void sta_info_free(struct sta_info *sta,
 kfree(key);
 			}
 		}
-		ieee80211_key_free(sta-key);
-		sta-key = NULL;
 	} else if (sta-key_idx_compression != HW_KEY_IDX_INVALID) {
 		struct ieee80211_key_conf conf;
 		memset(conf, 0, sizeof(conf));

[PATCH 2/2]d80211: add hardware scan callback

2006-08-25 Thread Hong Liu

Add hardware scan callback to support cards like ipw3945 which
implements the scan command in firmware.

Thanks,
Hong
diff --git a/include/net/d80211.h b/include/net/d80211.h
index ba5cb4c..b369d12 100644
--- a/include/net/d80211.h
+++ b/include/net/d80211.h
@@ -595,6 +595,10 @@ struct ieee80211_hw {
 int (*passive_scan)(struct net_device *dev, int state,
 struct ieee80211_scan_conf *conf);
 
+	/* Ask the hardware to service the scan request, no need to start
+	 * the scan state machine in stack. */
+	int (*hw_scan)(struct net_device *dev, u8 *ssid, size_t len);
+
 /* return low-level statistics */
 	int (*get_stats)(struct net_device *dev,
 			 struct ieee80211_low_level_stats *stats);
@@ -893,6 +897,8 @@ void ieee80211_tx_led(int state, struct 
  */
 void ieee80211_rx_led(int state, struct net_device *dev);
 
+/* set station scan completed */
+void ieee80211_set_scan_completed(struct net_device *dev);
 
 /* IEEE 802.11 defines */
 
diff --git a/net/d80211/ieee80211.c b/net/d80211/ieee80211.c
index 60eca90..dc920c1 100644
--- a/net/d80211/ieee80211.c
+++ b/net/d80211/ieee80211.c
@@ -4831,6 +4831,7 @@ EXPORT_SYMBOL(sta_info_get);
 EXPORT_SYMBOL(sta_info_put);
 EXPORT_SYMBOL(ieee80211_radar_status);
 EXPORT_SYMBOL(ieee80211_get_mc_list_item);
+EXPORT_SYMBOL(ieee80211_set_scan_completed);
 
 module_init(ieee80211_init);
 module_exit(ieee80211_exit);
diff --git a/net/d80211/ieee80211_sta.c b/net/d80211/ieee80211_sta.c
index 2144b34..4bb2234 100644
--- a/net/d80211/ieee80211_sta.c
+++ b/net/d80211/ieee80211_sta.c
@@ -2426,6 +2426,28 @@ static int ieee80211_active_scan(struct 
 }
 
 
+void ieee80211_set_scan_completed(struct net_device *dev)
+{
+	struct ieee80211_local *local = dev-ieee80211_ptr;
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	union iwreq_data wrqu;
+
+	printk(KERN_DEBUG %s: scan completed\n, dev-name);
+	local-sta_scanning = 0;
+	local-last_scan_completed = jiffies;
+
+	memset(wrqu, 0, sizeof(wrqu));
+	wireless_send_event(dev, SIOCGIWSCAN, wrqu, NULL);
+
+	if (sdata-type == IEEE80211_IF_TYPE_IBSS) {
+		struct ieee80211_if_sta *ifsta = sdata-u.sta;
+		if (!ifsta-bssid_set ||
+		(!ifsta-state == IEEE80211_IBSS_JOINED 
+		!ieee80211_sta_active_ibss(dev)))
+			ieee80211_sta_find_ibss(dev, ifsta);
+	}
+}
+
 static void ieee80211_sta_scan_work(void *ptr)
 {
 	struct net_device *dev = ptr;
@@ -2434,7 +2456,6 @@ static void ieee80211_sta_scan_work(void
 	struct ieee80211_hw_modes *mode;
 	struct ieee80211_channel *chan;
 	int skip;
-	union iwreq_data wrqu;
 	unsigned long next_delay = 0;
 
 	if (!local-sta_scanning)
@@ -2451,20 +2472,8 @@ static void ieee80211_sta_scan_work(void
    operational channel after scan\n,
    dev-name);
 			}
-			printk(KERN_DEBUG %s: scan completed\n, dev-name);
-			local-sta_scanning = 0;
-			local-last_scan_completed = jiffies;
-			memset(wrqu, 0, sizeof(wrqu));
-			wireless_send_event(dev, SIOCGIWSCAN, wrqu, NULL);
-			if (sdata-type == IEEE80211_IF_TYPE_IBSS) {
-struct ieee80211_sub_if_data *sdata =
-	IEEE80211_DEV_TO_SUB_IF(dev);
-struct ieee80211_if_sta *ifsta = sdata-u.sta;
-if (!ifsta-bssid_set ||
-(ifsta-state == IEEE80211_IBSS_JOINED 
- !ieee80211_sta_active_ibss(dev)))
-	ieee80211_sta_find_ibss(dev, ifsta);
-			}
+
+			ieee80211_set_scan_completed(dev);
 			return;
 		}
 		skip = !(local-enabled_modes  (1  mode-mode));
@@ -2565,9 +2574,12 @@ int ieee80211_sta_req_scan(struct net_de
 
 	printk(KERN_DEBUG %s: starting scan\n, dev-name);
 
+	local-sta_scanning = 1;
+	if (local-hw-hw_scan)
+		return local-hw-hw_scan(dev, ssid, ssid_len);
+
 	ieee80211_sta_save_oper_chan(dev);
 
-	local-sta_scanning = 1;
 	/* TODO: stop TX queue? */
 
 	if (ssid) {

Re: [RFC] add nl80211

2006-08-25 Thread Johannes Berg

On Thu, 2006-08-24 at 19:27 +0200, Thomas Graf wrote:

 I'd use normal u32 attributes here as well and simply
 enumerate their type 1..n.
 
   int idx = 1
   list_for_each_entry(drv, nl80211_drv_list, list)
   NLA_PUT_U32(msg, idx++, drv-wiphy);
 
 The additional header seems waste but this way you stay flexible
 and can extend the protocol later on. Attribute lengths are
 checked with an open end in mind, i.e. you can put more stuff
 behind that u32 in the future and your old applications will
 still work.
 
 You also might want to consider returning ifindex and
 the associated name.

That'd be a list of ifindexes again...

  +static int nl80211_get_intfs(struct sk_buff *skb, struct genl_info *info)

 Try not to reuse the same attribute type for different purposes,
 it will force you to duplicate the validation policy for every
 single command and things become very error prone.

I completely reworked that now so it will:
 * create a nested NL80211_ATTR_INTERFACE_LIST with nested {
   * 1..N attributes, with nested {
 * ATTR_IFINDEX and
 * ATTR_IFNAME
   }
 }

how does that sound? Maybe I should do the same for the WIPHY list? i.e.
create a new type ATTR_WIPHY_LIST and within that nest numbered
attributes (array indexes) and in there put ATTR_WIPHY? So possibly I
could also put ATTR_INTERFACE_LIST in there as well later?

johannes
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 44/44] [XFRM] IPV6: Support Mobile IPv6 extension headers sorting.

2006-08-25 Thread Masahide NAKAMURA

Masahide NAKAMURA wrote:
 David Miller wrote:
 From: Masahide NAKAMURA [EMAIL PROTECTED]
 Date: Thu, 24 Aug 2006 16:05:39 +0900

 David Miller wrote:
 In the mean time, I will work on porting my XFRM hashing changes
 for the current net-2.6.19 tree.
 FYI, your work will not have any conflict with the left of MIPv6 patches
 which I will describe later since they are almost out of XFRM.

 Great.

 I just finished the port and pushed all of that work to net-2.6.19,
 can folks please take a look?

 I tested IPSEC as best as I could with XFRM_SUB_POLICY enabled, but I
 have no way currently to test sub-policies or MIPV6 cases.

 OK, I will review it and also start my XFRM test with net-2.6.19
 as I've done with my tree.

I've found a problem about MIPv6 CN with the patch below.

commit 02b0fa84daaa70f035767c9a5a0d539667249e60
Author: David S. Miller [EMAIL PROTECTED]
Date:   Thu Aug 24 04:45:07 2006 -0700

[XFRM]: Hash policies when non-prefixed.

It seems that the policy hashing is not always used with selector protocol.
It may conflict with MIPL daemon thought.

Let me explain the detail:
MIPv6 specification says that all mobility header(MH) must be
sent without routing header type 2(RT2) / home address option,
except [*1].
To satisfy it MIPL daemon uses some bypass policies.

For CN outbound example(ip command output):

(a)MIPL daemon adds MH bypass policy when it starts to run:

src ::/0 dst ::/0 proto 135
dir out priority 12 ptype sub

(b)After binding is accepted, it also adds route optimization
policy to send user traffic with RT2:

src 3ffe:501::100::/128 dst 3ffe:501::101::/128
dir out priority 16 ptype sub
tmpl src :: dst ::
proto route2 reqid 0 mode ro
level use

When the daemon added both policy we expected that
all MH was used (a) otherwise (b) because of priority order.
But the kernel used (b) when the daemon sent MH from
3ffe:501::100:: to 3ffe:501::101::.

Note: such bypasses are also required for ICMPv6 error and
neighbor discovery.

(*1:
 Binding update(BU) can be sent with home address option
 and binding ack(BA) can be sent with RT2.)

Do you have any ideas?

Thanks,

--
Masahide NAKAMURA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 44/44] [XFRM] IPV6: Support Mobile IPv6 extension headers sorting.

2006-08-25 Thread David Miller

From: Masahide NAKAMURA [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 19:06:40 +0900

 I've found a problem about MIPv6 CN with the patch below.

We just need to search by priority in the inexact list, even
if we get a hit in the hash table.

The fix is trivial, please try this patch:

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 200e6e5..060f115 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -908,6 +908,7 @@ static struct xfrm_policy *xfrm_policy_l
xfrm_address_t *daddr, *saddr;
struct hlist_node *entry;
struct hlist_head *chain;
+   u32 priority = ~0U;
 
daddr = xfrm_flowi_daddr(fl, family);
saddr = xfrm_flowi_saddr(fl, family);
@@ -919,21 +920,21 @@ static struct xfrm_policy *xfrm_policy_l
ret = NULL;
hlist_for_each_entry(pol, entry, chain, bydst) {
if (xfrm_policy_match(pol, fl, type, family, dir)) {
-   xfrm_pol_hold(pol);
ret = pol;
+   priority = ret-priority;
break;
}
}
-   if (!ret) {
-   chain = xfrm_policy_inexact[dir];
-   hlist_for_each_entry(pol, entry, chain, bydst) {
-   if (xfrm_policy_match(pol, fl, type, family, dir)) {
-   xfrm_pol_hold(pol);
-   ret = pol;
-   break;
-   }
+   chain = xfrm_policy_inexact[dir];
+   hlist_for_each_entry(pol, entry, chain, bydst) {
+   if (xfrm_policy_match(pol, fl, type, family, dir) 
+   pol-priority  priority) {
+   ret = pol;
+   break;
}
}
+   if (ret)
+   xfrm_pol_hold(ret);
read_unlock_bh(xfrm_policy_lock);
 
return ret;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[NET_SCHED]: Add mask support to fwmark classifier

2006-08-25 Thread Patrick McHardy

This patch adds support to mask the nfmark value before the lookup
the the fw classifier. Unfortunately it has some drawbacks, so I'd
be interested if anyone can think of a better way.

The problem is that in order to avoid walking through all filters
contained in one instance, we need to mask the value before the
lookup. This means all filters share the same mask, which is
taken from the first filter created and stored in the filter head.
The user interface however always refers to a single filter,
not the head, so it can't be changed afterwards unless we just
overwrite it whenever a new filter is installed. Both is not
really perfect. The current patch doesn't allow to change the
mark and enforces that all filters use the same one, which I think
is better than allowing inconsistent configurations.

Any better ideas?

[NET_SCHED]: Add mask support to fwmark classifier

Support masking the nfmark value before the search. The mask value is
global for all filters contained in one instance. It can only be set
when a new instance is created, all filters must specify the same mask.

Signed-off-by: Patrick McHardy [EMAIL PROTECTED]

---
commit c7dff54dc2dca206ff54f66bfce290c49f98a3c8
tree 88d48096f13674f29413dc5c9853c7d0a8c5feac
parent e5d8ce21a2261f73b078d802bd2ab3508153b177
author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 12:03:19 +0200
committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 12:03:19 +0200

 include/linux/pkt_cls.h |1 +
 net/sched/cls_fw.c  |   25 -
 2 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index bd2c5a2..c3f01b3 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -305,6 +305,7 @@ enum
TCA_FW_POLICE,
TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
+   TCA_FW_MASK,
__TCA_FW_MAX
 };
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index e6973d9..c9385dc 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -50,6 +50,7 @@ #define HTSIZE (PAGE_SIZE/sizeof(struct 
 struct fw_head
 {
struct fw_filter *ht[HTSIZE];
+   u32 mask;
 };
 
 struct fw_filter
@@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *s
struct fw_filter *f;
int r;
 #ifdef CONFIG_NETFILTER
-   u32 id = skb-nfmark;
+   u32 id = skb-nfmark  head-mask;
 #else
u32 id = 0;
 #endif
@@ -209,7 +210,9 @@ static int
 fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
struct rtattr **tb, struct rtattr **tca, unsigned long base)
 {
+   struct fw_head *head = (struct fw_head*)tp-root;
struct tcf_exts e;
+   u32 mask;
int err;
 
err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], e, fw_ext_map);
@@ -232,6 +235,15 @@ #ifdef CONFIG_NET_CLS_IND
}
 #endif /* CONFIG_NET_CLS_IND */
 
+   if (tb[TCA_FW_MASK-1]) {
+   if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+   goto errout;
+   mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+   if (mask != head-mask)
+   goto errout;
+   } else if (head-mask != 0x)
+   goto errout;
+
tcf_exts_change(tp, f-exts, e);
 
return 0;
@@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *t
return -EINVAL;
 
if (head == NULL) {
+   u32 mask = 0x;
+   if (tb[TCA_FW_MASK-1]) {
+   if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+   return -EINVAL;
+   mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+   }
+
head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
if (head == NULL)
return -ENOBUFS;
+   head-mask = mask;
 
tcf_tree_lock(tp);
tp-root = head;
@@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp
 static int fw_dump(struct tcf_proto *tp, unsigned long fh,
   struct sk_buff *skb, struct tcmsg *t)
 {
+   struct fw_head *head = (struct fw_head *)tp-root;
struct fw_filter *f = (struct fw_filter*)fh;
unsigned char*b = skb-tail;
struct rtattr *rta;
@@ -351,6 +372,8 @@ #ifdef CONFIG_NET_CLS_IND
if (strlen(f-indev))
RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f-indev);
 #endif /* CONFIG_NET_CLS_IND */
+   if (head-mask != 0x)
+   RTA_PUT(skb, TCA_FW_MASK, 4, head-mask);
 
if (tcf_exts_dump(skb, f-exts, fw_ext_map)  0)
goto rtattr_failure;

Re: [RFC] add nl80211

2006-08-25 Thread Thomas Graf

* Johannes Berg [EMAIL PROTECTED] 2006-08-25 11:04
 I completely reworked that now so it will:
  * create a nested NL80211_ATTR_INTERFACE_LIST with nested {
* 1..N attributes, with nested {
  * ATTR_IFINDEX and
  * ATTR_IFNAME
}
  }
 
 how does that sound? Maybe I should do the same for the WIPHY list? i.e.
 create a new type ATTR_WIPHY_LIST and within that nest numbered
 attributes (array indexes) and in there put ATTR_WIPHY? So possibly I
 could also put ATTR_INTERFACE_LIST in there as well later?

That's exactly what I would have done as well.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC] add nl80211

2006-08-25 Thread Johannes Berg

On Fri, 2006-08-25 at 12:30 +0200, Thomas Graf wrote:

 That's exactly what I would have done as well.

Alright. Changing it, then I'll repost. Again :)

johannes
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [take13 1/3] kevent: Core files.

2006-08-25 Thread Andrew Morton

On Fri, 25 Aug 2006 09:48:15 +0400
Evgeniy Polyakov [EMAIL PROTECTED] wrote:

 kmalloc is really slow actually - it always shows somewhere on top 
 in profiles and brings noticeble overhead

It shouldn't.  Please describe the workload and send the profiles.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [take13 1/3] kevent: Core files.

2006-08-25 Thread Evgeniy Polyakov

On Thu, Aug 24, 2006 at 11:20:24PM -0700, Andrew Morton ([EMAIL PROTECTED]) 
wrote:
 On Fri, 25 Aug 2006 09:48:15 +0400
 Evgeniy Polyakov [EMAIL PROTECTED] wrote:
 
  kmalloc is really slow actually - it always shows somewhere on top 
  in profiles and brings noticeble overhead
 
 It shouldn't.  Please describe the workload and send the profiles.

epoll based trivial server (accept + sendfile for the same file, about
4k), httperf with big amount of simulateneous connections. 3c59x NIC 
(with e1000 there were no ioreads and netif_rx).
__alloc_skb calls kmem_cache_alloc() and ___kmalloc().

16158 1.3681  ioread16
8073  0.6835  ioread32
3485  0.2951  irq_entries_start
3018  0.2555  _spin_lock
2103  0.1781  tcp_v4_rcv
1503  0.1273  sysenter_past_esp
1492  0.1263  netif_rx
1459  0.1235  skb_copy_bits
1422  0.1204  _spin_lock_irqsave
1145  0.0969  ip_route_input
983   0.0832  kmem_cache_free
964   0.0816  __alloc_skb
926   0.0784  common_interrupt
891   0.0754  __do_IRQ
846   0.0716  _read_lock
826   0.0699  __netif_rx_schedule
806   0.0682  __kmalloc
767   0.0649  do_tcp_sendpages
747   0.0632  __copy_to_user_ll
744   0.0630  pskb_expand_head


-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [take13 1/3] kevent: Core files.

2006-08-25 Thread Andrew Morton

On Fri, 25 Aug 2006 10:32:38 +0400
Evgeniy Polyakov [EMAIL PROTECTED] wrote:

 On Thu, Aug 24, 2006 at 11:20:24PM -0700, Andrew Morton ([EMAIL PROTECTED]) 
 wrote:
  On Fri, 25 Aug 2006 09:48:15 +0400
  Evgeniy Polyakov [EMAIL PROTECTED] wrote:
  
   kmalloc is really slow actually - it always shows somewhere on top 
   in profiles and brings noticeble overhead
  
  It shouldn't.  Please describe the workload and send the profiles.
 
 epoll based trivial server (accept + sendfile for the same file, about
 4k), httperf with big amount of simulateneous connections. 3c59x NIC 
 (with e1000 there were no ioreads and netif_rx).
 __alloc_skb calls kmem_cache_alloc() and ___kmalloc().
 
 16158 1.3681  ioread16
 8073  0.6835  ioread32
 3485  0.2951  irq_entries_start
 3018  0.2555  _spin_lock
 2103  0.1781  tcp_v4_rcv
 1503  0.1273  sysenter_past_esp
 1492  0.1263  netif_rx
 1459  0.1235  skb_copy_bits
 1422  0.1204  _spin_lock_irqsave
 1145  0.0969  ip_route_input
 983   0.0832  kmem_cache_free
 964   0.0816  __alloc_skb
 926   0.0784  common_interrupt
 891   0.0754  __do_IRQ
 846   0.0716  _read_lock
 826   0.0699  __netif_rx_schedule
 806   0.0682  __kmalloc
 767   0.0649  do_tcp_sendpages
 747   0.0632  __copy_to_user_ll
 744   0.0630  pskb_expand_head
 

That doesn't look too bad.

What's that as a percentage of total user+system time?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [take13 1/3] kevent: Core files.

2006-08-25 Thread David Miller

From: Andrew Morton [EMAIL PROTECTED]
Date: Thu, 24 Aug 2006 23:20:24 -0700

 On Fri, 25 Aug 2006 09:48:15 +0400
 Evgeniy Polyakov [EMAIL PROTECTED] wrote:

  kmalloc is really slow actually - it always shows somewhere on top 
  in profiles and brings noticeble overhead

 It shouldn't.  Please describe the workload and send the profiles.

Not that I can account for the problem in this specific case, in my
experience cutting down kmalloc() calls matters a _lot_ performance
wise.

For example, this is why we allocate TCP sockets as one huge blob
instead of 3 seperate allocations (generic socket, IP socket, TCP
socket).

In fact, one of the remaining performance issues in IPSEC rule
creation is that we allocate seperately hunks of memory for the rule's
encryption state, the optional hash algorithm state, etc.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [take13 1/3] kevent: Core files.

2006-08-25 Thread Andrew Morton

On Fri, 25 Aug 2006 00:01:06 -0700 (PDT)
David Miller [EMAIL PROTECTED] wrote:

 From: Andrew Morton [EMAIL PROTECTED]
 Date: Thu, 24 Aug 2006 23:20:24 -0700

  On Fri, 25 Aug 2006 09:48:15 +0400
  Evgeniy Polyakov [EMAIL PROTECTED] wrote:

   kmalloc is really slow actually - it always shows somewhere on top 
   in profiles and brings noticeble overhead

  It shouldn't.  Please describe the workload and send the profiles.

 Not that I can account for the problem in this specific case, in my
 experience cutting down kmalloc() calls matters a _lot_ performance
 wise.

 For example, this is why we allocate TCP sockets as one huge blob
 instead of 3 seperate allocations (generic socket, IP socket, TCP
 socket).

 In fact, one of the remaining performance issues in IPSEC rule
 creation is that we allocate seperately hunks of memory for the rule's
 encryption state, the optional hash algorithm state, etc.

Part of that will be cache sharing between the three structs though.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [take13 1/3] kevent: Core files.

2006-08-25 Thread Evgeniy Polyakov

On Thu, Aug 24, 2006 at 11:58:59PM -0700, Andrew Morton ([EMAIL PROTECTED]) 
wrote:
kmalloc is really slow actually - it always shows somewhere on top 
in profiles and brings noticeble overhead
   
   It shouldn't.  Please describe the workload and send the profiles.
  
  epoll based trivial server (accept + sendfile for the same file, about
  4k), httperf with big amount of simulateneous connections. 3c59x NIC 
  (with e1000 there were no ioreads and netif_rx).
  __alloc_skb calls kmem_cache_alloc() and ___kmalloc().
  
  16158 1.3681  ioread16
  8073  0.6835  ioread32
  3485  0.2951  irq_entries_start
  3018  0.2555  _spin_lock
  2103  0.1781  tcp_v4_rcv
  1503  0.1273  sysenter_past_esp
  1492  0.1263  netif_rx
  1459  0.1235  skb_copy_bits
  1422  0.1204  _spin_lock_irqsave
  1145  0.0969  ip_route_input
  983   0.0832  kmem_cache_free
  964   0.0816  __alloc_skb
  926   0.0784  common_interrupt
  891   0.0754  __do_IRQ
  846   0.0716  _read_lock
  826   0.0699  __netif_rx_schedule
  806   0.0682  __kmalloc
  767   0.0649  do_tcp_sendpages
  747   0.0632  __copy_to_user_ll
  744   0.0630  pskb_expand_head
  
 
 That doesn't look too bad.
 
 What's that as a percentage of total user+system time?

With e1000 allocations take more time than actual TCP processing, so it
rised some suspicious for me (especially in bulk transfer).
Total time is about 7 times more than system one, user time is much less
than system one (about 20 times less, but test duration was not too
long, so it can vary).

I do not say it is bad, but it is noticeble and should be eliminated
if there are no requirements to have it.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[take14 0/3] kevent: Generic event handling mechanism.

2006-08-25 Thread Evgeniy Polyakov


Generic event handling mechanism.

Changes from 'take13' patchset:
 * do not get lock aroung user data check in __kevent_search()
 * fail early if there were no registered callbacks for given type of kevent
 * trailing whitespace cleanup

Changes from 'take12' patchset:
 * remove non-chardev interface for initialization
 * use pointer to kevent_mring instead of unsigned longs
 * use aligned 64bit type in raw user data (can be used by high-res timer if 
needed)
 * simplified enqueue/dequeue callbacks and kevent initialization
 * use nanoseconds for timeout
 * put number of milliseconds into timer's return data
 * move some definitions into user-visible header
 * removed filenames from comments

Changes from 'take11' patchset:
 * include missing headers into patchset
 * some trivial code cleanups (use goto instead of if/else games and so on)
 * some whitespace cleanups
 * check for ready_callback() callback before main loop which should save us 
some ticks

Changes from 'take10' patchset:
 * removed non-existent prototypes
 * added helper function for kevent_registered_callbacks
 * fixed 80 lines comments issues
 * added shared between userspace and kernelspace header instead of embedd them 
in one
 * core restructuring to remove forward declarations
 * s o m e w h i t e s p a c e c o d y n g s t y l e c l e a n u p
 * use vm_insert_page() instead of remap_pfn_range()

Changes from 'take9' patchset:
 * fixed -nopage method

Changes from 'take8' patchset:
 * fixed mmap release bug
 * use module_init() instead of late_initcall()
 * use better structures for timer notifications

Changes from 'take7' patchset:
 * new mmap interface (not tested, waiting for other changes to be acked)
- use nopage() method to dynamically substitue pages
- allocate new page for events only when new added kevent requres it
- do not use ugly index dereferencing, use structure instead
- reduced amount of data in the ring (id and flags), 
maximum 12 pages on x86 per kevent fd

Changes from 'take6' patchset:
 * a lot of comments!
 * do not use list poisoning for detection of the fact, that entry is in the 
list
 * return number of ready kevents even if copy*user() fails
 * strict check for number of kevents in syscall
 * use ARRAY_SIZE for array size calculation
 * changed superblock magic number
 * use SLAB_PANIC instead of direct panic() call
 * changed -E* return values
 * a lot of small cleanups and indent fixes

Changes from 'take5' patchset:
 * removed compilation warnings about unused wariables when lockdep is not 
turned on
 * do not use internal socket structures, use appropriate (exported) wrappers 
instead
 * removed default 1 second timeout
 * removed AIO stuff from patchset

Changes from 'take4' patchset:
 * use miscdevice instead of chardevice
 * comments fixes

Changes from 'take3' patchset:
 * removed serializing mutex from kevent_user_wait()
 * moved storage list processing to RCU
 * removed lockdep screaming - all storage locks are initialized in the same 
function, so it was learned 
to differentiate between various cases
 * remove kevent from storage if is marked as broken after callback
 * fixed a typo in mmaped buffer implementation which would end up in wrong 
index calcualtion 

Changes from 'take2' patchset:
 * split kevent_finish_user() to locked and unlocked variants
 * do not use KEVENT_STAT ifdefs, use inline functions instead
 * use array of callbacks of each type instead of each kevent callback 
initialization
 * changed name of ukevent guarding lock
 * use only one kevent lock in kevent_user for all hash buckets instead of 
per-bucket locks
 * do not use kevent_user_ctl structure instead provide needed arguments as 
syscall parameters
 * various indent cleanups
 * added optimisation, which is aimed to help when a lot of kevents are being 
copied from userspace
 * mapped buffer (initial) implementation (no userspace yet)

Changes from 'take1' patchset:
 - rebased against 2.6.18-git tree
 - removed ioctl controlling
 - added new syscall kevent_get_events(int fd, unsigned int min_nr, unsigned 
int max_nr,
unsigned int timeout, void __user *buf, unsigned flags)
 - use old syscall kevent_ctl for creation/removing, modification and initial 
kevent 
initialization
 - use mutuxes instead of semaphores
 - added file descriptor check and return error if provided descriptor does not 
match
kevent file operations
 - various indent fixes
 - removed aio_sendfile() declarations.

Thank you.

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[take14 2/3] kevent: poll/select() notifications.

2006-08-25 Thread Evgeniy Polyakov


poll/select() notifications.

This patch includes generic poll/select and timer notifications.

kevent_poll works simialr to epoll and has the same issues (callback
is invoked not from internal state machine of the caller, but through
process awake).

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2561020..76b3039 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -236,6 +236,7 @@ #include linux/prio_tree.h
 #include linux/init.h
 #include linux/sched.h
 #include linux/mutex.h
+#include linux/kevent.h
 
 #include asm/atomic.h
 #include asm/semaphore.h
@@ -698,6 +699,9 @@ #ifdef CONFIG_EPOLL
struct list_headf_ep_links;
spinlock_t  f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
+#ifdef CONFIG_KEVENT_POLL
+   struct kevent_storage   st;
+#endif
struct address_space*f_mapping;
 };
 extern spinlock_t files_lock;
diff --git a/kernel/kevent/kevent_poll.c b/kernel/kevent/kevent_poll.c
new file mode 100644
index 000..fb74e0f
--- /dev/null
+++ b/kernel/kevent/kevent_poll.c
@@ -0,0 +1,222 @@
+/*
+ * 2006 Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED]
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include linux/kernel.h
+#include linux/types.h
+#include linux/list.h
+#include linux/slab.h
+#include linux/spinlock.h
+#include linux/timer.h
+#include linux/file.h
+#include linux/kevent.h
+#include linux/poll.h
+#include linux/fs.h
+
+static kmem_cache_t *kevent_poll_container_cache;
+static kmem_cache_t *kevent_poll_priv_cache;
+
+struct kevent_poll_ctl
+{
+   struct poll_table_structpt;
+   struct kevent   *k;
+};
+
+struct kevent_poll_wait_container
+{
+   struct list_headcontainer_entry;
+   wait_queue_head_t   *whead;
+   wait_queue_twait;
+   struct kevent   *k;
+};
+
+struct kevent_poll_private
+{
+   struct list_headcontainer_list;
+   spinlock_t  container_lock;
+};
+
+static int kevent_poll_enqueue(struct kevent *k);
+static int kevent_poll_dequeue(struct kevent *k);
+static int kevent_poll_callback(struct kevent *k);
+
+static int kevent_poll_wait_callback(wait_queue_t *wait,
+   unsigned mode, int sync, void *key)
+{
+   struct kevent_poll_wait_container *cont =
+   container_of(wait, struct kevent_poll_wait_container, wait);
+   struct kevent *k = cont-k;
+   struct file *file = k-st-origin;
+   u32 revents;
+
+   revents = file-f_op-poll(file, NULL);
+
+   kevent_storage_ready(k-st, NULL, revents);
+
+   return 0;
+}
+
+static void kevent_poll_qproc(struct file *file, wait_queue_head_t *whead,
+   struct poll_table_struct *poll_table)
+{
+   struct kevent *k =
+   container_of(poll_table, struct kevent_poll_ctl, pt)-k;
+   struct kevent_poll_private *priv = k-priv;
+   struct kevent_poll_wait_container *cont;
+   unsigned long flags;
+
+   cont = kmem_cache_alloc(kevent_poll_container_cache, SLAB_KERNEL);
+   if (!cont) {
+   kevent_break(k);
+   return;
+   }
+
+   cont-k = k;
+   init_waitqueue_func_entry(cont-wait, kevent_poll_wait_callback);
+   cont-whead = whead;
+
+   spin_lock_irqsave(priv-container_lock, flags);
+   list_add_tail(cont-container_entry, priv-container_list);
+   spin_unlock_irqrestore(priv-container_lock, flags);
+
+   add_wait_queue(whead, cont-wait);
+}
+
+static int kevent_poll_enqueue(struct kevent *k)
+{
+   struct file *file;
+   int err, ready = 0;
+   unsigned int revents;
+   struct kevent_poll_ctl ctl;
+   struct kevent_poll_private *priv;
+
+   file = fget(k-event.id.raw[0]);
+   if (!file)
+   return -ENODEV;
+
+   err = -EINVAL;
+   if (!file-f_op || !file-f_op-poll)
+   goto err_out_fput;
+
+   err = -ENOMEM;
+   priv = kmem_cache_alloc(kevent_poll_priv_cache, SLAB_KERNEL);
+   if (!priv)
+   goto err_out_fput;
+
+   spin_lock_init(priv-container_lock);
+   INIT_LIST_HEAD(priv-container_list);
+
+   k-priv = priv;
+
+   ctl.k = k;
+   init_poll_funcptr(ctl.pt, kevent_poll_qproc);
+
+   err = kevent_storage_enqueue(file-st, k);
+   if (err)
+   goto err_out_free;
+
+   revents = file-f_op-poll(file, ctl.pt);
+   if

[take14 1/3] kevent: Core files.

2006-08-25 Thread Evgeniy Polyakov


Core files.

This patch includes core kevent files:
 - userspace controlling
 - kernelspace interfaces
 - initialization
 - notification state machines

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index dd63d47..091ff42 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -317,3 +317,5 @@ ENTRY(sys_call_table)
.long sys_tee   /* 315 */
.long sys_vmsplice
.long sys_move_pages
+   .long sys_kevent_get_events
+   .long sys_kevent_ctl
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 5d4a7d1..b2af4a8 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -713,4 +713,6 @@ #endif
.quad sys_tee
.quad compat_sys_vmsplice
.quad compat_sys_move_pages
+   .quad sys_kevent_get_events
+   .quad sys_kevent_ctl
 ia32_syscall_end:  
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index fc1c8dd..c9dde13 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -323,10 +323,12 @@ #define __NR_sync_file_range  314
 #define __NR_tee   315
 #define __NR_vmsplice  316
 #define __NR_move_pages317
+#define __NR_kevent_get_events 318
+#define __NR_kevent_ctl319
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 318
+#define NR_syscalls 320
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 94387c9..61363e0 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -619,10 +619,14 @@ #define __NR_vmsplice 278
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages279
 __SYSCALL(__NR_move_pages, sys_move_pages)
+#define __NR_kevent_get_events 280
+__SYSCALL(__NR_kevent_get_events, sys_kevent_get_events)
+#define __NR_kevent_ctl281
+__SYSCALL(__NR_kevent_ctl, sys_kevent_ctl)
 
 #ifdef __KERNEL__
 
-#define __NR_syscall_max __NR_move_pages
+#define __NR_syscall_max __NR_kevent_ctl
 
 #ifndef __NO_STUBS
 
diff --git a/include/linux/kevent.h b/include/linux/kevent.h
new file mode 100644
index 000..de33ec7
--- /dev/null
+++ b/include/linux/kevent.h
@@ -0,0 +1,173 @@
+/*
+ * 2006 Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED]
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __KEVENT_H
+#define __KEVENT_H
+#include linux/types.h
+#include linux/list.h
+#include linux/spinlock.h
+#include linux/mutex.h
+#include linux/wait.h
+#include linux/net.h
+#include linux/rcupdate.h
+#include linux/kevent_storage.h
+#include linux/ukevent.h
+
+#define KEVENT_MIN_BUFFS_ALLOC 3
+
+struct kevent;
+struct kevent_storage;
+typedef int (* kevent_callback_t)(struct kevent *);
+
+/* @callback is called each time new event has been caught. */
+/* @enqueue is called each time new event is queued. */
+/* @dequeue is called each time event is dequeued. */
+
+struct kevent_callbacks {
+   kevent_callback_t   callback, enqueue, dequeue;
+};
+
+#define KEVENT_READY   0x1
+#define KEVENT_STORAGE 0x2
+#define KEVENT_USER0x4
+
+struct kevent
+{
+   /* Used for kevent freeing.*/
+   struct rcu_head rcu_head;
+   struct ukevent  event;
+   /* This lock protects ukevent manipulations, e.g. ret_flags changes. */
+   spinlock_t  ulock;
+
+   /* Entry of user's queue. */
+   struct list_headkevent_entry;
+   /* Entry of origin's queue. */
+   struct list_headstorage_entry;
+   /* Entry of user's ready. */
+   struct list_headready_entry;
+
+   u32 flags;
+
+   /* User who requested this kevent. */
+   struct kevent_user  *user;
+   /* Kevent container. */
+   struct kevent_storage   *st;
+
+   struct kevent_callbacks callbacks;
+
+   /* Private data for different storages.
+* poll()/select storage has a list of wait_queue_t containers
+* for each -poll() { poll_wait()' } here.
+*/
+   void*priv;
+};
+
+#define

[take14 3/3] kevent: Timer notifications.

2006-08-25 Thread Evgeniy Polyakov


Timer notifications.

Timer notifications can be used for fine grained per-process time 
management, since interval timers are very inconvenient to use, 
and they are limited.

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

diff --git a/kernel/kevent/kevent_timer.c b/kernel/kevent/kevent_timer.c
new file mode 100644
index 000..b2fee61
--- /dev/null
+++ b/kernel/kevent/kevent_timer.c
@@ -0,0 +1,105 @@
+/*
+ * 2006 Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED]
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include linux/kernel.h
+#include linux/types.h
+#include linux/list.h
+#include linux/slab.h
+#include linux/spinlock.h
+#include linux/timer.h
+#include linux/jiffies.h
+#include linux/kevent.h
+
+struct kevent_timer
+{
+   struct timer_list   ktimer;
+   struct kevent_storage   ktimer_storage;
+};
+
+static void kevent_timer_func(unsigned long data)
+{
+   struct kevent *k = (struct kevent *)data;
+   struct timer_list *t = k-st-origin;
+
+   kevent_storage_ready(k-st, NULL, KEVENT_MASK_ALL);
+   mod_timer(t, jiffies + msecs_to_jiffies(k-event.id.raw[0]));
+}
+
+static struct lock_class_key kevent_timer_key;
+
+static int kevent_timer_enqueue(struct kevent *k)
+{
+   int err;
+   struct kevent_timer *t;
+
+   t = kmalloc(sizeof(struct kevent_timer), GFP_KERNEL);
+   if (!t)
+   return -ENOMEM;
+
+   setup_timer(t-ktimer, kevent_timer_func, (unsigned long)k);
+
+   err = kevent_storage_init(t-ktimer, t-ktimer_storage);
+   if (err)
+   goto err_out_free;
+   lockdep_set_class(t-ktimer_storage.lock, kevent_timer_key);
+
+   err = kevent_storage_enqueue(t-ktimer_storage, k);
+   if (err)
+   goto err_out_st_fini;
+
+   mod_timer(t-ktimer, jiffies + msecs_to_jiffies(k-event.id.raw[0]));
+
+   return 0;
+
+err_out_st_fini:
+   kevent_storage_fini(t-ktimer_storage);
+err_out_free:
+   kfree(t);
+
+   return err;
+}
+
+static int kevent_timer_dequeue(struct kevent *k)
+{
+   struct kevent_storage *st = k-st;
+   struct kevent_timer *t = container_of(st, struct kevent_timer, 
ktimer_storage);
+
+   del_timer_sync(t-ktimer);
+   kevent_storage_dequeue(st, k);
+   kfree(t);
+
+   return 0;
+}
+
+static int kevent_timer_callback(struct kevent *k)
+{
+   k-event.ret_data[0] = jiffies_to_msecs(jiffies);
+   return 1;
+}
+
+static int __init kevent_init_timer(void)
+{
+   struct kevent_callbacks tc = {
+   .callback = kevent_timer_callback,
+   .enqueue = kevent_timer_enqueue,
+   .dequeue = kevent_timer_dequeue};
+
+   return kevent_add_callbacks(tc, KEVENT_TIMER);
+}
+module_init(kevent_init_timer);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH 2/9] deadlock prevention core

2006-08-25 Thread Pavel Machek

Hi!

  - We expect that the lots-of-dirty-anon-memory-over-swap-over-network
scenario might still cause deadlocks.  
  
I assert that this can be solved by putting swap on local disks.  Peter
asserts that this isn't acceptable due to disk unreliability.  I point
out that local disk reliability can be increased via MD, all goes quiet.
 
 Putting swap on local disks really messes up the concept of stateless 
 servers. I suppose you can do some sort of swap encryption, but
 otherwise you need to scrub the swap partition on boot if you
 re-purpose the hardware. You also then need to do hardware
 configuration to make sure the local disks are all setup the 
 same way across all server platforms so the common images can 
 boot. 

We should really encrypt swap with random key generated at boot, for
all the machine. I believe it is possible (with some non-trivial
setup) today, but it would be nice to do it automagically.
Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NET_SCHED]: Add mask support to fwmark classifier

2006-08-25 Thread Thomas Graf

* Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29
 This patch adds support to mask the nfmark value before the lookup
 the the fw classifier. Unfortunately it has some drawbacks, so I'd
 be interested if anyone can think of a better way.
 
 The problem is that in order to avoid walking through all filters
 contained in one instance, we need to mask the value before the
 lookup. This means all filters share the same mask, which is
 taken from the first filter created and stored in the filter head.
 The user interface however always refers to a single filter,
 not the head, so it can't be changed afterwards unless we just
 overwrite it whenever a new filter is installed. Both is not
 really perfect. The current patch doesn't allow to change the
 mark and enforces that all filters use the same one, which I think
 is better than allowing inconsistent configurations.

The other option gets down to replacing the hash table with a
list and that's not an option in my opinion. This looks very
good to me.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] remove third bogus argument from NLA_PUT_FLAG

2006-08-25 Thread Johannes Berg

This patch removes the 'value' argument from NLA_PUT_FLAG which is
unused anyway. The documentation comment was already correct so it
doesn't need an update :)

Signed-off-by: Johannes Berg [EMAIL PROTECTED]

--- wireless-dev.orig/include/net/netlink.h 2006-08-25 12:46:30.0 
+0200
+++ wireless-dev/include/net/netlink.h  2006-08-25 12:46:38.0 +0200
@@ -758,7 +758,7 @@ static inline int nla_put_msecs(struct s
 #define NLA_PUT_STRING(skb, attrtype, value) \
NLA_PUT(skb, attrtype, strlen(value) + 1, value)
 
-#define NLA_PUT_FLAG(skb, attrtype, value) \
+#define NLA_PUT_FLAG(skb, attrtype) \
NLA_PUT(skb, attrtype, 0, NULL)
 
 #define NLA_PUT_MSECS(skb, attrtype, jiffies) \

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC take3] add nl80211

2006-08-25 Thread Johannes Berg

This patch adds nl80211, a netlink based configuration
system for wireless hardware.

It currently features a few helper commands and commands to
add and remove virtual interfaces and to inject packets.
Support for nl80211 in d80211 is in a follow-up patch.

There should be support for notifications, but we need to figure
out if we remove the sysfs based add/remove virtual interface
thing completely or allow the driver to create a notification
through some new API here.

It requires the patches in
http://marc.theaimsgroup.com/?l=linux-netdevm=115625436628696w=2
and
http://marc.theaimsgroup.com/?l=linux-netdevm=115625168405439w=2

(the latter doesn't apply cleanly against wireless-dev, but you can
safely ignore the pieces that don't, at least for wireless testing :) )

It also requires the NLA_PUT_FLAG patch I did:
http://marc.theaimsgroup.com/?l=linux-netdevm=115650333420169w=2

Signed-off-by: Johannes Berg [EMAIL PROTECTED]

--- /dev/null   1970-01-01 00:00:00.0 +
+++ wireless-dev/include/net/nl80211.h  2006-08-25 12:51:14.0 +0200
@@ -0,0 +1,83 @@
+#ifndef __NET_NL80211_H
+#define __NET_NL80211_H
+
+#include linux/netlink.h
+#include linux/nl80211.h
+#include linux/skbuff.h
+#include linux/netdevice.h
+#include net/genetlink.h
+
+/*
+ * 802.11 netlink in-kernel interface
+ *
+ * Copyright 2006 Johannes Berg [EMAIL PROTECTED]
+ */
+
+/**
+ * struct nl80211_ops - backend description for wireless configuration
+ *
+ * This struct is registered by fullmac card drivers and/or wireless stacks
+ * in order to handle configuration requests on their interfaces.
+ *
+ * The priv pointer passed to each call is the pointer that was
+ * registered in nl80211_register_driver().
+ *
+ * All callbacks except where otherwise noted should return 0
+ * on success or a negative error code.
+ *
+ * @list_interfaces: for each interfaces belonging to the wiphy identified
+ *  by the priv pointer, call the one() function with the
+ *  given data and the ifindex. This callback is required.
+ *
+ * @inject_packet: inject the given frame with the NL80211_FLAG_*
+ *flags onto the given queue.
+ *
+ * @add_virtual_intf: create a new virtual interface with the given name
+ *
+ * @del_virtual_intf: remove the virtual interface determined by ifindex.
+ */
+struct nl80211_ops {
+   int (*list_interfaces)(void *priv, void *data,
+  int (*one)(void *data, int ifindex));
+   int (*inject_packet)(void *priv, void *frame, int framelen,
+u32 flags, int queue);
+
+   int (*add_virtual_intf)(void *priv, char *name,
+   unsigned int type);
+   int (*del_virtual_intf)(void *priv, int ifindex);
+
+   /* more things to be added...
+*
+* for a (*configure)(...) call I'd probably guess that the
+* best bet would be to have one call that returns all
+* possible options, one that sets them based on the
+* struct genl_info *info, and one for that optimised
+* set-at-once thing.
+*/
+};
+
+/*
+ * register a given method structure with the nl80211 system
+ * and associate the 'priv' pointer with it.
+ *
+ * Returns a positive wiphy index or a negative error code.
+ *
+ * NOTE: for proper operation, this priv pointer MUST also be
+ * assigned to each struct net_device's @ieee80211_ptr member!
+ */
+extern int nl80211_register(struct nl80211_ops *ops, void *priv);
+/*
+ * unregister a device with the given priv pointer.
+ * After this call, no more requests can be made with this priv
+ * pointer, but the call may sleep to wait for an outstanding
+ * request that is being handled.
+ */
+extern void nl80211_unregister(void *priv);
+
+/* helper functions */
+extern void *nl80211hdr_put(struct sk_buff *skb, u32 pid,
+   u32 seq, int flags, u8 cmd);
+extern void *nl80211msg_new(struct sk_buff **skb, u32 pid,
+   u32 seq, int flags, u8 cmd);
+
+#endif /* __NET_NL80211_H */
--- wireless-dev.orig/net/Kconfig   2006-08-25 12:51:09.0 +0200
+++ wireless-dev/net/Kconfig2006-08-25 12:51:14.0 +0200
@@ -250,6 +250,9 @@ source net/ieee80211/Kconfig
 config WIRELESS_EXT
bool
 
+config NETLINK_80211
+   tristate
+
 endif   # if NET
 endmenu # Networking
 
--- wireless-dev.orig/net/Makefile  2006-08-25 12:51:09.0 +0200
+++ wireless-dev/net/Makefile   2006-08-25 12:51:14.0 +0200
@@ -44,6 +44,7 @@ obj-$(CONFIG_ECONET)  += econet/
 obj-$(CONFIG_VLAN_8021Q)   += 8021q/
 obj-$(CONFIG_IP_DCCP)  += dccp/
 obj-$(CONFIG_IP_SCTP)  += sctp/
+obj-$(CONFIG_NETLINK_80211)+= wireless/
 obj-$(CONFIG_D80211)   += d80211/
 obj-$(CONFIG_IEEE80211)+= ieee80211/
 obj-$(CONFIG_TIPC) += tipc/
--- /dev/null   1970-01-01 00:00:00.0 +
+++

[RFC take3] make d80211 use nl80211

2006-08-25 Thread Johannes Berg

This patch makes d80211 partially configurable using the
infrastructure that nl80211 provides. So far, it allows
packet injection and adding/removing virtual interfaces.

Signed-off-by: Johannes Berg [EMAIL PROTECTED]

--- wireless-dev.orig/net/d80211/Kconfig2006-08-25 11:31:01.0 
+0200
+++ wireless-dev/net/d80211/Kconfig 2006-08-25 11:32:38.0 +0200
@@ -3,6 +3,7 @@ config D80211
select CRYPTO
select CRYPTO_ARC4
select CRYPTO_AES
+   select NETLINK_80211
---help---
This option enables the hardware independent IEEE 802.11
networking stack.
--- wireless-dev.orig/net/d80211/Makefile   2006-08-25 11:31:01.0 
+0200
+++ wireless-dev/net/d80211/Makefile2006-08-25 11:32:38.0 +0200
@@ -8,6 +8,7 @@ obj-$(CONFIG_D80211) += 80211.o rate_con
sta_info.o \
wep.o \
wpa.o \
+   ieee80211_cfg.o \
ieee80211_scan.o \
ieee80211_sta.o \
ieee80211_dev.o \
--- wireless-dev.orig/net/d80211/ieee80211.c2006-08-25 11:31:01.0 
+0200
+++ wireless-dev/net/d80211/ieee80211.c 2006-08-25 11:32:38.0 +0200
@@ -20,6 +20,7 @@
 #include net/iw_handler.h
 #include linux/compiler.h
 #include linux/bitmap.h
+#include linux/nl80211.h
 
 #include net/d80211.h
 #include net/d80211_common.h
@@ -32,6 +33,7 @@
 #include wme.h
 #include aes_ccm.h
 #include ieee80211_led.h
+#include ieee80211_cfg.h
 
 /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
 /* Ethernet-II snap header (RFC1042 for most EtherTypes) */
@@ -354,6 +356,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee8021
 {
struct rate_control_extra extra;
 
+   /* FIXME
+   if (tx-dev == tx-local-mdev 
+   (inject rate set)) {
+   a
+   tx-u.tx.rate = ...
+   etc etc
+   return TXRX_CONTINUE;
+   }
+   */
+
memset(extra, 0, sizeof(extra));
extra.mgmt_data = tx-sdata 
tx-sdata-type == IEEE80211_IF_TYPE_MGMT;
@@ -759,6 +771,13 @@ ieee80211_tx_h_misc(struct ieee80211_txr
u16 dur;
struct ieee80211_tx_control *control = tx-u.tx.control;
 
+   /* FIXME
+   if (tx-dev == tx-local-mdev) {
+   set up retry limit, ...
+   based on injection parameters
+   }
+   */
+
if (!is_multicast_ether_addr(hdr-addr1)) {
if (tx-skb-len + FCS_LEN  tx-local-rts_threshold 
tx-local-rts_threshold  IEEE80211_MAX_RTS_THRESHOLD) {
@@ -884,6 +903,9 @@ ieee80211_tx_h_check_assoc(struct ieee80
 #endif /* CONFIG_D80211_VERBOSE_DEBUG */
u32 sta_flags;
 
+   if (unlikely(tx-dev == tx-local-mdev))
+   return TXRX_CONTINUE;
+
if (unlikely(tx-local-sta_scanning != 0) 
((tx-fc  IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT ||
 (tx-fc  IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ))
@@ -987,6 +1009,12 @@ static void purge_old_ps_buffers(struct 
 static inline ieee80211_txrx_result
 ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx)
 {
+   /* FIXME
+   if (unlikely(tx-dev == tx-local-mdev 
+   (inject flags)  NL80211_FLAG_NOBUFFER))
+   return TXRX_CONTINUE;
+   */
+
/* broadcast/multicast frame */
/* If any of the associated stations is in power save mode,
 * the frame is buffered to be sent after DTIM beacon frame */
@@ -1414,11 +1442,12 @@ static int ieee80211_master_start_xmit(s
 
control.ifindex = odev-ifindex;
control.type = osdata-type;
-   control.req_tx_status = pkt_data-req_tx_status;
-   control.do_not_encrypt = pkt_data-do_not_encrypt;
+   control.req_tx_status = !!(pkt_data-flags  NL80211_FLAG_TXSTATUS);
+   control.do_not_encrypt = !(pkt_data-flags  NL80211_FLAG_ENCRYPT);
control.pkt_type =
-   pkt_data-pkt_probe_resp ? PKT_PROBE_RESP : PKT_NORMAL;
-   control.requeue = pkt_data-requeue;
+   (pkt_data-internal_flags  TX_FLAG_PROBERESP) ?
+   PKT_PROBE_RESP : PKT_NORMAL;
+   control.requeue = !!(pkt_data-internal_flags  TX_FLAG_REQUEUE);
control.queue = pkt_data-queue;
 
ret = ieee80211_tx(odev, skb, control,
@@ -1594,8 +1623,10 @@ static int ieee80211_subif_start_xmit(st
pkt_data = (struct ieee80211_tx_packet_data *)skb-cb;
memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data));
pkt_data-ifindex = sdata-dev-ifindex;
-   pkt_data-mgmt_iface = (sdata-type == IEEE80211_IF_TYPE_MGMT);
-   pkt_data-do_not_encrypt = no_encrypt;
+   if (sdata-type == IEEE80211_IF_TYPE_MGMT)
+   pkt_data-internal_flags |= TX_FLAG_INJECTED;
+   if (!no_encrypt)
+   pkt_data-flags |= NL80211_FLAG_ENCRYPT;
 
skb-dev = sdata-master;
sdata-stats.tx_packets++;
@@ -1646,11 +1677,12 @@ ieee80211_mgmt_start_xmit(struct sk_buff
pkt_data = (struct

Re: [NET_SCHED]: Add mask support to fwmark classifier

2006-08-25 Thread Patrick McHardy

Thomas Graf wrote:
 * Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29
 
The problem is that in order to avoid walking through all filters
contained in one instance, we need to mask the value before the
lookup. This means all filters share the same mask, which is
taken from the first filter created and stored in the filter head.
The user interface however always refers to a single filter,
not the head, so it can't be changed afterwards unless we just
overwrite it whenever a new filter is installed. Both is not
really perfect. The current patch doesn't allow to change the
mark and enforces that all filters use the same one, which I think
is better than allowing inconsistent configurations.
 
 
 The other option gets down to replacing the hash table with a
 list and that's not an option in my opinion. This looks very
 good to me.


Great, thanks. I'll send it off to Dave with two similar patches
for IPv4 and DecNET routing rules.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[DECNET]: Add support for fwmark masks in routing rules

2006-08-25 Thread Patrick McHardy

[DECNET]: Add support for fwmark masks in routing rules

Add support for fwmark masks. For compatibility a mask of 0x is used
when a mark value != 0 is sent without a mask.

Signed-off-by: Patrick McHardy [EMAIL PROTECTED]

---
commit bcd4f6996453aaf0a8d5515dcc533115621c961f
tree 62909d3d2c6edd4f236284b86c4c422cb40bc489
parent 9037bbabed75d822002be78047f518d42f225a00
author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:00:12 +0200
committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:00:12 +0200

 net/decnet/dn_rules.c |   20 ++--
 1 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 50e819e..63ad63d 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -47,6 +47,7 @@ struct dn_fib_rule
u8  flags;
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
u32 fwmark;
+   u32 fwmask;
 #endif
 };
 
@@ -116,6 +117,7 @@ static struct nla_policy dn_fib_rule_pol
[FRA_SRC]   = { .type = NLA_U16 },
[FRA_DST]   = { .type = NLA_U16 },
[FRA_FWMARK]= { .type = NLA_U32 },
+   [FRA_FWMASK]= { .type = NLA_U32 },
[FRA_TABLE] = { .type = NLA_U32 },
 };
 
@@ -130,7 +132,7 @@ static int dn_fib_rule_match(struct fib_
return 0;
 
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-   if (r-fwmark  (r-fwmark != fl-fld_fwmark))
+   if ((r-fwmark ^ fl-fld_fwmark)  r-fwmask)
return 0;
 #endif
 
@@ -168,8 +170,17 @@ static int dn_fib_rule_configure(struct 
r-dst = nla_get_u16(tb[FRA_DST]);
 
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
-   if (tb[FRA_FWMARK])
+   if (tb[FRA_FWMARK]) {
r-fwmark = nla_get_u32(tb[FRA_FWMARK]);
+   if (r-fwmark)
+   /* compatibility: if the mark value is non-zero all bits
+* are compared unless a mask is explicitly specified.
+*/
+   r-fwmask = 0x;
+   }
+
+   if (tb[FRA_FWMASK])
+   r-fwmask = nla_get_u32(tb[FRA_FWMASK]);
 #endif
 
r-src_len = frh-src_len;
@@ -195,6 +206,9 @@ static int dn_fib_rule_compare(struct fi
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
if (tb[FRA_FWMARK]  (r-fwmark != nla_get_u32(tb[FRA_FWMARK])))
return 0;
+
+   if (tb[FRA_FWMASK]  (r-fwmask != nla_get_u32(tb[FRA_FWMASK])))
+   return 0;
 #endif
 
if (tb[FRA_SRC]  (r-src != nla_get_u16(tb[FRA_SRC])))
@@ -237,6 +251,8 @@ static int dn_fib_rule_fill(struct fib_r
 #ifdef CONFIG_DECNET_ROUTE_FWMARK
if (r-fwmark)
NLA_PUT_U32(skb, FRA_FWMARK, r-fwmark);
+   if (r-fwmask || r-fwmark)
+   NLA_PUT_U32(skb, FRA_FWMASK, r-fwmask);
 #endif
if (r-dst_len)
NLA_PUT_U16(skb, FRA_DST, r-dst);

[NET_SCHED]: Add mask support to fwmark classifier

2006-08-25 Thread Patrick McHardy

[NET_SCHED]: Add mask support to fwmark classifier

Support masking the nfmark value before the search. The mask value is
global for all filters contained in one instance. It can only be set
when a new instance is created, all filters must specify the same mask.

Signed-off-by: Patrick McHardy [EMAIL PROTECTED]

---
commit 734b411074d5cdb6cf1d85c7460f63730fe958f6
tree 4324105ebc0a46250cc564ecbfa3f11b8dba4369
parent bcd4f6996453aaf0a8d5515dcc533115621c961f
author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:01:20 +0200
committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 14:01:20 +0200

 include/linux/pkt_cls.h |1 +
 net/sched/cls_fw.c  |   25 -
 2 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index bd2c5a2..c3f01b3 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -305,6 +305,7 @@ enum
TCA_FW_POLICE,
TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
+   TCA_FW_MASK,
__TCA_FW_MAX
 };
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index e6973d9..e54acc6 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -50,6 +50,7 @@ #define HTSIZE (PAGE_SIZE/sizeof(struct 
 struct fw_head
 {
struct fw_filter *ht[HTSIZE];
+   u32 mask;
 };
 
 struct fw_filter
@@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *s
struct fw_filter *f;
int r;
 #ifdef CONFIG_NETFILTER
-   u32 id = skb-nfmark;
+   u32 id = skb-nfmark  head-mask;
 #else
u32 id = 0;
 #endif
@@ -209,7 +210,9 @@ static int
 fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
struct rtattr **tb, struct rtattr **tca, unsigned long base)
 {
+   struct fw_head *head = (struct fw_head *)tp-root;
struct tcf_exts e;
+   u32 mask;
int err;
 
err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], e, fw_ext_map);
@@ -232,6 +235,15 @@ #ifdef CONFIG_NET_CLS_IND
}
 #endif /* CONFIG_NET_CLS_IND */
 
+   if (tb[TCA_FW_MASK-1]) {
+   if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+   goto errout;
+   mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+   if (mask != head-mask)
+   goto errout;
+   } else if (head-mask != 0x)
+   goto errout;
+
tcf_exts_change(tp, f-exts, e);
 
return 0;
@@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *t
return -EINVAL;
 
if (head == NULL) {
+   u32 mask = 0x;
+   if (tb[TCA_FW_MASK-1]) {
+   if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32))
+   return -EINVAL;
+   mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]);
+   }
+
head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
if (head == NULL)
return -ENOBUFS;
+   head-mask = mask;
 
tcf_tree_lock(tp);
tp-root = head;
@@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp
 static int fw_dump(struct tcf_proto *tp, unsigned long fh,
   struct sk_buff *skb, struct tcmsg *t)
 {
+   struct fw_head *head = (struct fw_head *)tp-root;
struct fw_filter *f = (struct fw_filter*)fh;
unsigned char*b = skb-tail;
struct rtattr *rta;
@@ -351,6 +372,8 @@ #ifdef CONFIG_NET_CLS_IND
if (strlen(f-indev))
RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f-indev);
 #endif /* CONFIG_NET_CLS_IND */
+   if (head-mask != 0x)
+   RTA_PUT(skb, TCA_FW_MASK, 4, head-mask);
 
if (tcf_exts_dump(skb, f-exts, fw_ext_map)  0)
goto rtattr_failure;

[IPV4]: Add support for fwmark masks in routing rules

2006-08-25 Thread Patrick McHardy

Hi Dave,

these three patches add support for masking the nfmark value
in a few spots where it would be useful in an attempt to make
life easier for users using it for multiple unrelated things.

[IPV4]: Add support for fwmark masks in routing rules

Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of
0x is used when a mark value != 0 is sent without a mask.

Signed-off-by: Patrick McHardy [EMAIL PROTECTED]

---
commit 9037bbabed75d822002be78047f518d42f225a00
tree 2ccc07b7c4d7f20b2b8722ed935908595c197803
parent e6d442e62c126e11b3199ca1bddeb7534a7cb15e
author Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 13:59:10 +0200
committer Patrick McHardy [EMAIL PROTECTED] Fri, 25 Aug 2006 13:59:10 +0200

 include/linux/fib_rules.h |3 ++-
 net/ipv4/fib_rules.c  |   21 +++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 19a82b6..4418c8d 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -34,12 +34,13 @@ enum
FRA_UNUSED3,
FRA_UNUSED4,
FRA_UNUSED5,
-   FRA_FWMARK, /* netfilter mark (IPv4) */
+   FRA_FWMARK, /* netfilter mark */
FRA_FLOW,   /* flow/class id */
FRA_UNUSED6,
FRA_UNUSED7,
FRA_UNUSED8,
FRA_TABLE,  /* Extended table id */
+   FRA_FWMASK, /* mask for netfilter mark */
__FRA_MAX
 };
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ce185ac..280f424 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -46,6 +46,7 @@ struct fib4_rule
u32 dstmask;
 #ifdef CONFIG_IP_ROUTE_FWMARK
u32 fwmark;
+   u32 fwmask;
 #endif
 #ifdef CONFIG_NET_CLS_ROUTE
u32 tclassid;
@@ -160,7 +161,7 @@ static int fib4_rule_match(struct fib_ru
return 0;
 
 #ifdef CONFIG_IP_ROUTE_FWMARK
-   if (r-fwmark  (r-fwmark != fl-fl4_fwmark))
+   if ((r-fwmark ^ fl-fl4_fwmark)  r-fwmask)
return 0;
 #endif
 
@@ -183,6 +184,7 @@ static struct nla_policy fib4_rule_polic
[FRA_SRC]   = { .type = NLA_U32 },
[FRA_DST]   = { .type = NLA_U32 },
[FRA_FWMARK]= { .type = NLA_U32 },
+   [FRA_FWMASK]= { .type = NLA_U32 },
[FRA_FLOW]  = { .type = NLA_U32 },
[FRA_TABLE] = { .type = NLA_U32 },
 };
@@ -219,8 +221,17 @@ static int fib4_rule_configure(struct fi
rule4-dst = nla_get_u32(tb[FRA_DST]);
 
 #ifdef CONFIG_IP_ROUTE_FWMARK
-   if (tb[FRA_FWMARK])
+   if (tb[FRA_FWMARK]) {
rule4-fwmark = nla_get_u32(tb[FRA_FWMARK]);
+   if (rule4-fwmark)
+   /* compatibility: if the mark value is non-zero all bits
+* are compared unless a mask is explicitly specified.
+*/
+   rule4-fwmask = 0x;
+   }
+
+   if (tb[FRA_FWMASK])
+   rule4-fwmask = nla_get_u32(tb[FRA_FWMASK]);
 #endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -256,6 +267,9 @@ static int fib4_rule_compare(struct fib_
 #ifdef CONFIG_IP_ROUTE_FWMARK
if (tb[FRA_FWMARK]  (rule4-fwmark != nla_get_u32(tb[FRA_FWMARK])))
return 0;
+
+   if (tb[FRA_FWMASK]  (rule4-fwmask != nla_get_u32(tb[FRA_FWMASK])))
+   return 0;
 #endif
 
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -285,6 +299,9 @@ static int fib4_rule_fill(struct fib_rul
 #ifdef CONFIG_IP_ROUTE_FWMARK
if (rule4-fwmark)
NLA_PUT_U32(skb, FRA_FWMARK, rule4-fwmark);
+
+   if (rule4-fwmask || rule4-fwmark)
+   NLA_PUT_U32(skb, FRA_FWMASK, rule4-fwmask);
 #endif
 
if (rule4-dst_len)

Re: [DECNET]: Add support for fwmark masks in routing rules

2006-08-25 Thread Steven Whitehouse

Hi,

On Fri, Aug 25, 2006 at 02:14:12PM +0200, Patrick McHardy wrote:

 [DECNET]: Add support for fwmark masks in routing rules
 
 Add support for fwmark masks. For compatibility a mask of 0x is used
 when a mark value != 0 is sent without a mask.
 
 Signed-off-by: Patrick McHardy [EMAIL PROTECTED]
Acked-by: Steven Whitehouse [EMAIL PROTECTED]

Looks good,

Steve.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NET_SCHED]: Add mask support to fwmark classifier

2006-08-25 Thread jamal

On Fri, 2006-25-08 at 14:02 +0200, Patrick McHardy wrote:
 Thomas Graf wrote:
  * Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29
  
 The problem is that in order to avoid walking through all filters
 contained in one instance, we need to mask the value before the
 lookup. This means all filters share the same mask, which is
 taken from the first filter created and stored in the filter head.
 The user interface however always refers to a single filter,
 not the head, so it can't be changed afterwards unless we just
 overwrite it whenever a new filter is installed. Both is not
 really perfect. The current patch doesn't allow to change the
 mark and enforces that all filters use the same one, which I think
 is better than allowing inconsistent configurations.
  
  
  The other option gets down to replacing the hash table with a
  list and that's not an option in my opinion. This looks very
  good to me.
 
 
 Great, thanks. I'll send it off to Dave with two similar patches
 for IPv4 and DecNET routing rules.

ACKed by me as well.

cheers,
jamal

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

IBM eHEA Device Driver upstream inclusion

2006-08-25 Thread Jan-Bernd Themann

Hi Jeff,

the IBM eHEA Device Driver has been discussed on the netdev, linux-ppc and 
kernel mailing list for some time. The latest patch set we posted can be
found at:

http://www.spinics.net/lists/netdev/msg12820.html

As the discussion seems to have settled, please consider our driver for
upstream inclusion.

Thanks,

Jan-Bernd Themann  Christoph Raisch

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2.6.17 0/9] NetXen: 1G/10G Ethernet Driver - patch for big-endian systems

2006-08-25 Thread Amit S. Kale

Wendy, Michael, Ueimor,

Thanks for the patch and feedback. We'll integrate these into our driver and 
post an update asap.

-Amit

On Friday 25 August 2006 03:10, Francois Romieu wrote:
 wen xiong [EMAIL PROTECTED] :
 [...]

  diff -Nuar old/drivers/net/netxen/netxen_nic_hw.c
  new/drivers/net/netxen/netxen_nic_hw.c ---
  old/drivers/net/netxen/netxen_nic_hw.c  2006-08-23 12:58:43.0
  -0500 +++ new/drivers/net/netxen/netxen_nic_hw.c2006-08-23
  13:15:19.0 -0500 @@ -313,7 +313,8 @@
  }
  }
  CMD_DESC_TCP_HDR_OFFSET_WRT(desc, skb-h.raw - skb-data);
  -   desc-ip_hdr_offset = skb-nh.raw - skb-data;
  +   desc-length_tcp_hdr=cpu_to_le32(desc-length_tcp_hdr);

 s/=/ = /

 (several occurences)

 [...]

  diff -Nuar old/drivers/net/netxen/netxen_nic_init.c
  new/drivers/net/netxen/netxen_nic_init.c ---
  old/drivers/net/netxen/netxen_nic_init.c2006-08-23 12:58:43.0
  -0500 +++ new/drivers/net/netxen/netxen_nic_init.c  2006-08-23
  13:15:19.0 -0500 @@ -494,7 +494,7 @@
  desc_head = recv_ctx-rcv_status_desc_head;
  desc = desc_head[consumer];
 
  -   if ((desc-owner  STATUS_OWNER_HOST))
  +   if (((le16_to_cpu(desc-owner))  STATUS_OWNER_HOST))

 Would it make a difference to swab the constant part, i.e.:

   if (desc-owner  cpu_to_le16(STATUS_OWNER_HOST))
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2.6.17 0/9] NetXen: 1G/10G Ethernet Driver

2006-08-25 Thread Amit S. Kale

Hi Don,

Thanks. We'll lindent the sources and post an update asap.
-Amit

On Thursday 24 August 2006 05:34, Don Fry wrote:
 It looks like you have not run the source throught Lindent as previously
 requested.  Before you submit the code again, please use the Lindent
 script.

 I can get the code to ping between two cards.  Will be doing more
 testing tomorrow.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 0/3] secid reconciliation-v01: Repost patchset with up dates

2006-08-25 Thread Venkat Yekkirala

 I like these changes, but wondering why you haven't supplied 
 code for the 
 outbound case ?
 
 
 - James

The code for the outbound is still in the works. I hope to have it
out in a week or so.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 0/3] secid reconciliation-v01: Repost patchset with up dates

2006-08-25 Thread James Morris

On Fri, 25 Aug 2006, Venkat Yekkirala wrote:

  I like these changes, but wondering why you haven't supplied 
  code for the 
  outbound case ?
  
  
  - James
 
 The code for the outbound is still in the works. I hope to have it
 out in a week or so.

Ok, I guess we should wait until then before incorporating the patches 
(also, for Paul Moore to return and comment re. CIPSO).


- James
-- 
James Morris
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 44/44] [XFRM] IPV6: Support Mobile IPv6 extension headers sorting.

2006-08-25 Thread Masahide NAKAMURA

On Fri, 25 Aug 2006 03:16:51 -0700 (PDT)
David Miller [EMAIL PROTECTED] wrote:

 From: Masahide NAKAMURA [EMAIL PROTECTED]
 Date: Fri, 25 Aug 2006 19:06:40 +0900

  I've found a problem about MIPv6 CN with the patch below.

 We just need to search by priority in the inexact list, even
 if we get a hit in the hash table.

 The fix is trivial, please try this patch:

Thank you for providing it quickly.
It works! 

I continue my test with this patch for now to confirm other features.

Regards,

-- 
Masahide NAKAMURA

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NET_SCHED]: Add mask support to fwmark classifier

2006-08-25 Thread jamal

On Fri, 2006-25-08 at 14:02 +0200, Patrick McHardy wrote:
 Thomas Graf wrote:
  * Patrick McHardy [EMAIL PROTECTED] 2006-08-25 12:29
  
 The problem is that in order to avoid walking through all filters
 contained in one instance, we need to mask the value before the
 lookup. This means all filters share the same mask, which is
 taken from the first filter created and stored in the filter head.
 The user interface however always refers to a single filter,
 not the head, so it can't be changed afterwards unless we just
 overwrite it whenever a new filter is installed. Both is not
 really perfect. The current patch doesn't allow to change the
 mark and enforces that all filters use the same one, which I think
 is better than allowing inconsistent configurations.
  
  
  The other option gets down to replacing the hash table with a
  list and that's not an option in my opinion. This looks very
  good to me.
 

This doesnt obsolete my previous ack, but:

Another approach could have been to add the mask as part of the hashing.
and you add the new hash field not in the head rather in the filter. At
runtime, you hash - walk the bucket and compare the mask as well as the
index.

The above could be a future improvement. 

cheers,
jamal


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules.

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

[IPV6] ROUTE: Add support for fwmask in routing rules.

Add support for fwmark masks.
A mask of 0x is used when a mark value != 0 is sent without a mask.

Based on patch for net/ipv4/fib_rules.c by Patrick McHardy [EMAIL PROTECTED].

Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]
---
 net/ipv6/fib6_rules.c |   24 ++--
 1 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 3d64c71..ee4aa43 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -28,6 +28,7 @@ struct fib6_rule
struct rt6key   dst;
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
u32 fwmark;
+   u32 fwmask;
 #endif
u8  tclass;
 };
@@ -128,7 +129,7 @@ static int fib6_rule_match(struct fib_ru
return 0;
 
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
-   if (r-fwmark  (r-fwmark != fl-fl6_fwmark))
+   if ((r-fwmark ^ fl-fl6_fwmark) / r-fwmask)
return 0;
 #endif
 
@@ -141,6 +142,7 @@ static struct nla_policy fib6_rule_polic
[FRA_SRC]   = { .minlen = sizeof(struct in6_addr) },
[FRA_DST]   = { .minlen = sizeof(struct in6_addr) },
[FRA_FWMARK]= { .type = NLA_U32 },
+   [FRA_FWMASK]= { .type = NLA_U32 },
[FRA_TABLE] = { .type = NLA_U32 },
 };
 
@@ -174,8 +176,20 @@ static int fib6_rule_configure(struct fi
   sizeof(struct in6_addr));
 
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
-   if (tb[FRA_FWMARK])
+   if (tb[FRA_FWMARK]) {
rule6-fwmark = nla_get_u32(tb[FRA_FWMARK]);
+   if (rule6-fwmark) {
+   /*
+* if the mark value is non-zero,
+* all bits are compared by default
+* unless a mask is explicitly specified.
+*/
+   rule6-fwmask = 0x;
+   }
+   }
+
+   if (tb[FRA_FWMASK])
+   rule6-fwmask = nla_get_u32(tb[FRA_FWMASK]);
 #endif
 
rule6-src.plen = frh-src_len;
@@ -212,6 +226,9 @@ static int fib6_rule_compare(struct fib_
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
if (tb[FRA_FWMARK]  (rule6-fwmark != nla_get_u32(tb[FRA_FWMARK])))
return 0;
+
+   if (tb[FRA_FWMASK]  (rule6-fwmask != nla_get_u32(tb[FRA_FWMASK])))
+   return 0;
 #endif
 
return 1;
@@ -238,6 +255,9 @@ static int fib6_rule_fill(struct fib_rul
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
if (rule6-fwmark)
NLA_PUT_U32(skb, FRA_FWMARK, rule6-fwmark);
+
+   if (rule6-fwmask)
+   NLA_PUT_U32(skb, FRA_FWMASK, rule6-fwmask);
 #endif
 
return 0;

-- 
YOSHIFUJI Hideaki @ USAGI Project  [EMAIL PROTECTED]
GPG-FP  : 9022 65EB 1ECF 3AD1 0BDF  80D8 4807 F894 E062 0EEA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy.

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

[IPV6] ROUTE: Fix size of fib6_rule_policy.

It should not be RTA_MAX+1 but FRA_MAX+1.

Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]
---
 net/ipv6/fib6_rules.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b4cd5c0..3d64c71 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -135,7 +135,7 @@ #endif
return 1;
 }
 
-static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = {
+static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
[FRA_IFNAME]= { .type = NLA_STRING },
[FRA_PRIORITY]  = { .type = NLA_U32 },
[FRA_SRC]   = { .minlen = sizeof(struct in6_addr) },

-- 
YOSHIFUJI Hideaki @ USAGI Project  [EMAIL PROTECTED]
GPG-FP  : 9022 65EB 1ECF 3AD1 0BDF  80D8 4807 F894 E062 0EEA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/3] [IPV6] Policy Routing Updates

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

Hello.

Here's some IPv6 policy rouging fixes on top of net-2.6.19 tree.

  [PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support.
  [PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy.

If we accept Patrick's IPv4 fwmask patch, here's the one for IPv6.

  [PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules.

-- 
YOSHIFUJI Hideaki @ USAGI Project  [EMAIL PROTECTED]
GPG-FP  : 9022 65EB 1ECF 3AD1 0BDF  80D8 4807 F894 E062 0EEA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support.

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

[IPV6] ROUTE: Fix FWMARK support.

- Add missing nla_policy entry.
- type of fwmark is u32, not u8.

Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]
---
 net/ipv6/fib6_rules.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index aebd9e2..b4cd5c0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -27,7 +27,7 @@ struct fib6_rule
struct rt6key   src;
struct rt6key   dst;
 #ifdef CONFIG_IPV6_ROUTE_FWMARK
-   u8  fwmark;
+   u32 fwmark;
 #endif
u8  tclass;
 };
@@ -140,6 +140,7 @@ static struct nla_policy fib6_rule_polic
[FRA_PRIORITY]  = { .type = NLA_U32 },
[FRA_SRC]   = { .minlen = sizeof(struct in6_addr) },
[FRA_DST]   = { .minlen = sizeof(struct in6_addr) },
+   [FRA_FWMARK]= { .type = NLA_U32 },
[FRA_TABLE] = { .type = NLA_U32 },
 };
 

-- 
YOSHIFUJI Hideaki @ USAGI Project  [EMAIL PROTECTED]
GPG-FP  : 9022 65EB 1ECF 3AD1 0BDF  80D8 4807 F894 E062 0EEA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4] nbd: deadlock prevention for NBD

2006-08-25 Thread Peter Zijlstra


Use sk_set_vmio() on the nbd socket.

Limit each request to 1 page, so that the request throttling also limits the
number of in-flight pages and force the IO scheduler to NOOP as anything else
doesn't make sense anyway.

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
Signed-off-by: Daniel Phillips [EMAIL PROTECTED]
---
 drivers/block/nbd.c |   18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

Index: linux-2.6/drivers/block/nbd.c
===
--- linux-2.6.orig/drivers/block/nbd.c
+++ linux-2.6/drivers/block/nbd.c
@@ -135,7 +135,6 @@ static int sock_xmit(struct socket *sock
spin_unlock_irqrestore(current-sighand-siglock, flags);
 
do {
-   sock-sk-sk_allocation = GFP_NOIO;
iov.iov_base = buf;
iov.iov_len = size;
msg.msg_name = NULL;
@@ -361,8 +360,16 @@ static void nbd_do_it(struct nbd_device 
 
BUG_ON(lo-magic != LO_MAGIC);
 
+   sk_adjust_memalloc(0, 1);
+   if (sk_set_vmio(lo-sock-sk))
+   printk(KERN_WARNING
+  failed to set SOCK_VMIO on NBD socket\n);
+
while ((req = nbd_read_stat(lo)) != NULL)
nbd_end_request(req);
+
+   sk_adjust_memalloc(0, -1);
+
return;
 }
 
@@ -525,6 +533,7 @@ static int nbd_ioctl(struct inode *inode
if (S_ISSOCK(inode-i_mode)) {
lo-file = file;
lo-sock = SOCKET_I(inode);
+   lo-sock-sk-sk_allocation = GFP_NOIO;
error = 0;
} else {
fput(file);
@@ -628,11 +637,16 @@ static int __init nbd_init(void)
 * every gendisk to have its very own request_queue struct.
 * These structs are big so we dynamically allocate them.
 */
-   disk-queue = blk_init_queue(do_nbd_request, nbd_lock);
+   disk-queue = blk_init_queue_node_elv(do_nbd_request,
+   nbd_lock, -1, noop);
if (!disk-queue) {
put_disk(disk);
goto out;
}
+   blk_queue_pin_elevator(disk-queue);
+   blk_queue_max_segment_size(disk-queue, PAGE_SIZE);
+   blk_queue_max_hw_segments(disk-queue, 1);
+   blk_queue_max_phys_segments(disk-queue, 1);
}
 
if (register_blkdev(NBD_MAJOR, nbd)) {
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/4] blkdev: iosched selection for queue creation

2006-08-25 Thread Peter Zijlstra


Provide an block queue init function that allows to set an elevator.
And a function to pin the current elevator.

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
Signed-off-by: Daniel Phillips [EMAIL PROTECTED]
---
 block/elevator.c   |5 +
 block/ll_rw_blk.c  |   12 ++--
 include/linux/blkdev.h |9 +
 3 files changed, 24 insertions(+), 2 deletions(-)

Index: linux-2.6/block/ll_rw_blk.c
===
--- linux-2.6.orig/block/ll_rw_blk.c
+++ linux-2.6/block/ll_rw_blk.c
@@ -1899,6 +1899,14 @@ EXPORT_SYMBOL(blk_init_queue);
 request_queue_t *
 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
+   return blk_init_queue_node_elv(rfn, lock, node_id, NULL);
+}
+EXPORT_SYMBOL(blk_init_queue_node);
+
+request_queue_t *
+blk_init_queue_node_elv(request_fn_proc *rfn, spinlock_t *lock, int node_id,
+   char *elv_name)
+{
request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
 
if (!q)
@@ -1939,7 +1947,7 @@ blk_init_queue_node(request_fn_proc *rfn
/*
 * all done
 */
-   if (!elevator_init(q, NULL)) {
+   if (!elevator_init(q, elv_name)) {
blk_queue_congestion_threshold(q);
return q;
}
@@ -1947,7 +1955,7 @@ blk_init_queue_node(request_fn_proc *rfn
blk_put_queue(q);
return NULL;
 }
-EXPORT_SYMBOL(blk_init_queue_node);
+EXPORT_SYMBOL(blk_init_queue_node_elv);
 
 int blk_get_queue(request_queue_t *q)
 {
Index: linux-2.6/include/linux/blkdev.h
===
--- linux-2.6.orig/include/linux/blkdev.h
+++ linux-2.6/include/linux/blkdev.h
@@ -444,6 +444,12 @@ struct request_queue
 #define QUEUE_FLAG_REENTER 6   /* Re-entrancy avoidance */
 #define QUEUE_FLAG_PLUGGED 7   /* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH   8   /* don't use elevator, just do FIFO */
+#define QUEUE_FLAG_ELVPINNED   9   /* pin the current elevator */
+
+static inline void blk_queue_pin_elevator(struct request_queue *q)
+{
+   set_bit(QUEUE_FLAG_ELVPINNED, q-queue_flags);
+}
 
 enum {
/*
@@ -696,6 +702,9 @@ static inline void elv_dispatch_add_tail
 /*
  * Access functions for manipulating queue properties
  */
+extern request_queue_t *blk_init_queue_node_elv(request_fn_proc *rfn,
+   spinlock_t *lock, int node_id,
+   char *elv_name);
 extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn,
spinlock_t *lock, int node_id);
 extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *);
Index: linux-2.6/block/elevator.c
===
--- linux-2.6.orig/block/elevator.c
+++ linux-2.6/block/elevator.c
@@ -861,6 +861,11 @@ ssize_t elv_iosched_store(request_queue_
size_t len;
struct elevator_type *e;
 
+   if (test_bit(QUEUE_FLAG_ELVPINNED, q-queue_flags)) {
+   printk(KERN_NOTICE elevator: cannot switch elevator, 
pinned\n);
+   return count;
+   }
+
elevator_name[sizeof(elevator_name) - 1] = '\0';
strncpy(elevator_name, name, sizeof(elevator_name) - 1);
len = strlen(elevator_name);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/4] nfs: deadlock prevention for NFS

2006-08-25 Thread Peter Zijlstra


Provide a proper a_ops-swapfile() implementation for NFS. This will
set the NFS socket to SOCK_VMIO and put the socket reconnection under
PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done).

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
---
 fs/nfs/file.c   |   21 -
 include/linux/sunrpc/xprt.h |4 +++-
 net/sunrpc/xprtsock.c   |   16 
 3 files changed, 39 insertions(+), 2 deletions(-)

Index: linux-2.6/fs/nfs/file.c
===
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -27,6 +27,7 @@
 #include linux/slab.h
 #include linux/pagemap.h
 #include linux/smp_lock.h
+#include net/sock.h
 
 #include asm/uaccess.h
 #include asm/system.h
@@ -317,7 +318,25 @@ static int nfs_release_page(struct page 
 
 static int nfs_swapfile(struct address_space *mapping, int enable)
 {
-   return 0;
+   int err = -EINVAL;
+   struct rpc_clnt *client = NFS_CLIENT(mapping-host);
+   struct sock *sk = client-cl_xprt-inet;
+
+   if (enable) {
+   client-cl_xprt-swapper = 1;
+   /*
+* keep one extra sock reference so the reserve won't dip
+* when the socket gets reconnected.
+*/
+   sk_adjust_memalloc(1, 1);
+   err = sk_set_vmio(sk);
+   } else if (client-cl_xprt-swapper) {
+   client-cl_xprt-swapper = 0;
+   sk_adjust_memalloc(-1, -1);
+   err = sk_clear_vmio(sk);
+   }
+
+   return err;
 }
 
 const struct address_space_operations nfs_file_aops = {
Index: linux-2.6/net/sunrpc/xprtsock.c
===
--- linux-2.6.orig/net/sunrpc/xprtsock.c
+++ linux-2.6/net/sunrpc/xprtsock.c
@@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
 {
struct rpc_xprt *xprt = (struct rpc_xprt *) args;
struct socket *sock = xprt-sock;
+   unsigned long pflags = current-flags;
int err, status = -EIO;
 
if (xprt-shutdown || xprt-addr.sin_port == 0)
@@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
 
dprintk(RPC:  xs_udp_connect_worker for xprt %p\n, xprt);
 
+   if (xprt-swapper)
+   current-flags |= PF_MEMALLOC;
+
/* Start by resetting any existing state */
xs_close(xprt);
 
@@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
xprt-sock = sock;
xprt-inet = sk;
 
+   if (xprt-swapper)
+   sk_set_vmio(sk);
+
write_unlock_bh(sk-sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
@@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
 out:
xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+   current-flags = pflags;
 }
 
 /*
@@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
 {
struct rpc_xprt *xprt = (struct rpc_xprt *)args;
struct socket *sock = xprt-sock;
+   unsigned long pflags = current-flags;
int err, status = -EIO;
 
if (xprt-shutdown || xprt-addr.sin_port == 0)
goto out;
 
+   if (xprt-swapper)
+   current-flags |= PF_MEMALLOC;
+
dprintk(RPC:  xs_tcp_connect_worker for xprt %p\n, xprt);
 
if (!xprt-sock) {
@@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void *
break;
}
}
+
+   if (xprt-swapper)
+   sk_set_vmio(xprt-inet);
 out:
xprt_wake_pending_tasks(xprt, status);
 out_clear:
xprt_clear_connecting(xprt);
+   current-flags = pflags;
 }
 
 /**
Index: linux-2.6/include/linux/sunrpc/xprt.h
===
--- linux-2.6.orig/include/linux/sunrpc/xprt.h
+++ linux-2.6/include/linux/sunrpc/xprt.h
@@ -147,7 +147,9 @@ struct rpc_xprt {
unsigned intmax_reqs;   /* total slots */
unsigned long   state;  /* transport state */
unsigned char   shutdown   : 1, /* being shut down */
-   resvport   : 1; /* use a reserved port */
+   resvport   : 1, /* use a reserved port */
+   swapper: 1; /* we're swapping over this
+  transport */
 
/*
 * XID
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/4] VM deadlock prevention -v5

2006-08-25 Thread Peter Zijlstra

Hi,

The latest version of the VM deadlock prevention work.

The basic premises is that network sockets serving the VM need undisturbed
functionality in the face of severe memory shortage.

This patch-set provides the framework to provide this.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] net: VM deadlock avoidance framework

2006-08-25 Thread Peter Zijlstra


The core of the VM deadlock avoidance framework.

In order to provide robust networked block devices there must be a guarantee
of progress. That is, the block device must never stall because of OOM because
the device itself might be needed to get out of OOM (reclaim pageout).

This means that the device queue must always be unplugable, this in turn means
that it must always find enough memory to build/send packets over the network
_and_ receive ACKs for those packets.

The network stack has a huge capacity for buffering packets; waiting for 
user-space to read them. There is a practical limit imposed to avoid DoS 
scenarios. These two things make for a deadlock; what if the receive limit is
reached and all packets are buffered in non-critical sockets (those not serving
the network block device waiting for an ACK to free a page). 

Memory pressure will add to that; what if there is simply no memory left to
receive packets in.

This patch provides a service to register sockets as critical; SOCK_VMIO
is a promise the socket will never block on receive. Along with with a memory
reserve that will service a limited number of packets this can guarantee full
service to these critical sockets.

When we make sure that packets allocated from the reserve will only service
critical sockets we will not lose the memory and can guarantee progress.

Since memory is tight and the reserve modest, we do not want to lose memory to
fragmentation effects. Hence a very simple allocator is used to guarantee that
the memory used for each packet is returned to the page allocator.

Converted protocols:
IPv4  IPv6:
 - icmp
 - udp
 - tcp
IPv4:
 - igmp

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
Signed-off-by: Daniel Phillips [EMAIL PROTECTED]
---
 include/linux/gfp.h|3 -
 include/linux/mmzone.h |1 
 include/linux/skbuff.h |   13 --
 include/net/sock.h |   37 +
 mm/page_alloc.c|   41 ++-
 net/core/skbuff.c  |  103 ++---
 net/core/sock.c|   97 ++
 net/ipv4/af_inet.c |3 +
 net/ipv4/icmp.c|3 +
 net/ipv4/igmp.c|3 +
 net/ipv4/tcp_ipv4.c|3 +
 net/ipv4/udp.c |8 +++
 net/ipv6/af_inet6.c|3 +
 net/ipv6/icmp.c|3 +
 net/ipv6/tcp_ipv6.c|3 +
 net/ipv6/udp.c |3 +
 16 files changed, 305 insertions(+), 22 deletions(-)

Index: linux-2.6/include/linux/gfp.h
===
--- linux-2.6.orig/include/linux/gfp.h
+++ linux-2.6/include/linux/gfp.h
@@ -46,6 +46,7 @@ struct vm_area_struct;
 #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on 
success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x1u) /* Don't use emergency 
reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x2u) /* Enforce hardwall cpuset 
memory allocs */
+#define __GFP_EMERG  ((__force gfp_t)0x4u) /* Use emergency reserves */
 
 #define __GFP_BITS_SHIFT 20/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1  __GFP_BITS_SHIFT) - 1))
@@ -54,7 +55,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-   __GFP_NOMEMALLOC|__GFP_HARDWALL)
+   __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_EMERG)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT (GFP_ATOMIC  ~__GFP_HIGH)
Index: linux-2.6/include/linux/mmzone.h
===
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -420,6 +420,7 @@ int percpu_pagelist_fraction_sysctl_hand
void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
struct file *, void __user *, size_t *, loff_t *);
+int adjust_memalloc_reserve(int bytes);
 
 #include linux/topology.h
 /* Returns the number of the current Node. */
Index: linux-2.6/include/linux/skbuff.h
===
--- linux-2.6.orig/include/linux/skbuff.h
+++ linux-2.6/include/linux/skbuff.h
@@ -282,7 +282,8 @@ struct sk_buff {
nfctinfo:3;
__u8pkt_type:3,
fclone:2,
-   ipvs_property:1;
+   ipvs_property:1,
+   emerg:1;
__be16  protocol;
 
void(*destructor)(struct sk_buff *skb);
@@ -327,10 +328,13 @@ struct sk_buff {
 
 #include asm/system.h
 
+#define SKB_ALLOC_FCLONE   0x01
+#define SKB_ALLOC_RX   0x02

Re: [PATCH 0/4] VM deadlock prevention -v5

2006-08-25 Thread Christoph Lameter

On Fri, 25 Aug 2006, Peter Zijlstra wrote:

 The basic premises is that network sockets serving the VM need undisturbed
 functionality in the face of severe memory shortage.
 
 This patch-set provides the framework to provide this.

Hmmm.. Is it not possible to avoid the memory pools by 
guaranteeing that a certain number of page is easily reclaimable?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/7] [DCCP]: Fixes and enhancements

2006-08-25 Thread Arnaldo Carvalho de Melo


On 8/24/06, Ian McDonald [EMAIL PROTECTED] wrote:

 I spent all of today on USAGI's IPSEC/MIPV6 patches and related
 issues, so I'll look into this tomorrow.

 Thanks Ian.

Yes I saw that. Take your time as this is nowhere near as important!


Sigh, I'm still busy indeed, gave a quick look at the series and up to
the 10th patch its OK at first sight, should improve the current
situation, thanks.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/4] VM deadlock prevention -v5

2006-08-25 Thread Peter Zijlstra

On Fri, 2006-08-25 at 08:51 -0700, Christoph Lameter wrote:
 On Fri, 25 Aug 2006, Peter Zijlstra wrote:
 
  The basic premises is that network sockets serving the VM need undisturbed
  functionality in the face of severe memory shortage.
  
  This patch-set provides the framework to provide this.
 
 Hmmm.. Is it not possible to avoid the memory pools by 
 guaranteeing that a certain number of page is easily reclaimable?

We're not actually using mempools, but the memalloc reserve. Purely easy
reclaimable memory is not enough however, since packet receive happens
from IRQ context, and we cannot unmap pages in IRQ context.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/4] VM deadlock prevention -v5

2006-08-25 Thread Rik van Riel


Christoph Lameter wrote:

On Fri, 25 Aug 2006, Peter Zijlstra wrote:


The basic premises is that network sockets serving the VM need undisturbed
functionality in the face of severe memory shortage.

This patch-set provides the framework to provide this.


Hmmm.. Is it not possible to avoid the memory pools by 
guaranteeing that a certain number of page is easily reclaimable?


No.

You need to guarantee that the memory is not gobbled up by
another subsystem, but remains available for use by *this*
subsystem.  Otherwise you could still deadlock.

--
What is important?  What you want to be true, or what is true?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PATCH] IPv6 Updates for net-2.6.19

2006-08-25 Thread Thomas Graf

* YOSHIFUJI Hideaki / ?$B5HF#1QL@ [EMAIL PROTECTED] 2006-08-25 17:21
 commit 10204d532f5f8bb379009ba0bee2113bafda72be
 Author: YOSHIFUJI Hideaki [EMAIL PROTECTED]
 Date:   Mon Aug 21 19:22:01 2006 +0900
 
 [IPV6] ROUTE: Routing by FWMARK.
 
 Based on patch by Jean Lorchat [EMAIL PROTECTED].
 
 Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]
 
 diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
 index 19a82b6..2987549 100644
 --- a/include/linux/fib_rules.h
 +++ b/include/linux/fib_rules.h
 @@ -34,7 +34,7 @@ enum
   FRA_UNUSED3,
   FRA_UNUSED4,
   FRA_UNUSED5,
 - FRA_FWMARK, /* netfilter mark (IPv4) */
 + FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */
   FRA_FLOW,   /* flow/class id */
   FRA_UNUSED6,
   FRA_UNUSED7,

You're missing the validation policy entry for FRA_FWMARK
in the IPv6 policy.

 diff --git a/include/net/flow.h b/include/net/flow.h
 index e052291..3ca210e 100644
 --- a/include/net/flow.h
 +++ b/include/net/flow.h
 @@ -26,6 +26,7 @@ struct flowi {
   struct {
   struct in6_addr daddr;
   struct in6_addr saddr;
 + __u32   fwmark;
   __u32   flowlabel;
   } ip6_u;

Since all flowi users now use fwmark it can be moved out
of the union.

 diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
 index 91f6233..aebd9e2 100644
 --- a/net/ipv6/fib6_rules.c
 +++ b/net/ipv6/fib6_rules.c
 @@ -26,6 +26,9 @@ struct fib6_rule
   struct fib_rule common;
   struct rt6key   src;
   struct rt6key   dst;
 +#ifdef CONFIG_IPV6_ROUTE_FWMARK
 + u8  fwmark;
 +#endif
   u8  tclass;
  };

This doesn't look right.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/3] [IPV6] Policy Routing Updates

2006-08-25 Thread Thomas Graf

* YOSHIFUJI Hideaki / ?$B5HF#1QL@ [EMAIL PROTECTED] 2006-08-26 00:08
 Hello.
 
 Here's some IPv6 policy rouging fixes on top of net-2.6.19 tree.
 
   [PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support.
   [PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy.
 
 If we accept Patrick's IPv4 fwmask patch, here's the one for IPv6.
 
   [PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules.

Haven't noticed them in time, ignore my comments on the previous
patches.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[IPV6] Q: corrupt checksums when transferring data

2006-08-25 Thread Brandeburg, Jesse

I'm enabling e1000 to offload IPv6 since the 2.6.18+ kernels support it.
The kernel I'm testing is 2.6.18-rc4.  

Everything with the hardware offload is working fine, but it appears
that the GSO code may not correctly segment frames sometimes for IPv6
traffic.
I did a tcpdump on both ends with all hardware offloading disabled
through ethtool.  Here is what I got, note the long frame and then the
retransmit.

Has this problem been addressed already?  I'll compile and test a couple
newer kernels, any suggested target patches or kernels would be
appreciated.

Sender:
===
15:56:28.769034 bk1-6.33541  lh2-6.12865: S 3200244805:3200244805(0)
win 5760 mss 1440,sackOK,timestamp 64767859 0,nop,wscale 7
15:56:28.769042 lh2-6.12865  bk1-6.33541: S 1558653050:1558653050(0)
ack 3200244806 win 5712 mss 1440,sackOK,timestamp 172654320
64767859,nop,wscale 7
15:56:28.769102 bk1-6.33541  lh2-6.12865: . ack 1 win 45
nop,nop,timestamp 64767859 172654320
15:56:28.769350 bk1-6.33541  lh2-6.12865: P 1:257(256) ack 1 win 45
nop,nop,timestamp 64767859 172654320
15:56:28.769381 lh2-6.12865  bk1-6.33541: . ack 257 win 53
nop,nop,timestamp 172654320 64767859
15:56:28.769731 lh2-6.12865  bk1-6.33541: P 1:257(256) ack 257 win 53
nop,nop,timestamp 172654320 64767859
15:56:28.769851 bk1-6.33541  lh2-6.12865: . ack 257 win 54
nop,nop,timestamp 64767860 172654320
15:56:28.769860 bk1-6.46315  lh2-6.35704: S 3205139672:3205139672(0)
win 5760 mss 1440,sackOK,timestamp 64767860 0,nop,wscale 7
15:56:28.769873 lh2-6.35704  bk1-6.46315: S 1557432368:1557432368(0)
ack 3205139673 win 5712 mss 1440,sackOK,timestamp 172654320
64767860,nop,wscale 7
15:56:28.769975 bk1-6.46315  lh2-6.35704: . ack 1 win 45
nop,nop,timestamp 64767860 172654320
15:56:28.770009 lh2-6.35704  bk1-6.46315: . 1:2857(2856) ack 1 win 45
nop,nop,timestamp 172654320 64767860
15:56:28.972354 lh2-6.35704  bk1-6.46315: . 1:1429(1428) ack 1 win 45
nop,nop,timestamp 172654371 64767860
15:56:28.972478 bk1-6.46315  lh2-6.35704: . ack 1429 win 68
nop,nop,timestamp 64767910 172654371
15:56:28.972493 lh2-6.35704  bk1-6.46315: . 1429:2857(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910
15:56:28.972602 bk1-6.46315  lh2-6.35704: . ack 2857 win 90
nop,nop,timestamp 64767910 172654371
15:56:28.972611 lh2-6.35704  bk1-6.46315: . 2857:4285(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910
15:56:28.972727 bk1-6.46315  lh2-6.35704: . ack 4285 win 112
nop,nop,timestamp 64767910 172654371
15:56:28.972735 lh2-6.35704  bk1-6.46315: . 4285:5713(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910
15:56:28.972742 lh2-6.35704  bk1-6.46315: . 5713:7141(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910
15:56:28.972853 bk1-6.46315  lh2-6.35704: . ack 5713 win 135
nop,nop,timestamp 64767910 172654371
15:56:28.972862 lh2-6.35704  bk1-6.46315: . 7141:8569(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910
15:56:28.972868 lh2-6.35704  bk1-6.46315: . 8569:9997(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910

Receiver:
=
15:56:28.764058 bk1-6.33541  lh2-6.12865: S 3200244805:3200244805(0)
win 5760 mss 1440,sackOK,timestamp 64767859 0,nop,wscale 7
15:56:28.764181 lh2-6.12865  bk1-6.33541: S 1558653050:1558653050(0)
ack 3200244806 win 5712 mss 1440,sackOK,timestamp 172654320
64767859,nop,wscale 7
15:56:28.764205 bk1-6.33541  lh2-6.12865: . ack 1 win 45
nop,nop,timestamp 64767859 172654320
15:56:28.764441 bk1-6.33541  lh2-6.12865: P 1:257(256) ack 1 win 45
nop,nop,timestamp 64767859 172654320
15:56:28.764552 lh2-6.12865  bk1-6.33541: . ack 257 win 53
nop,nop,timestamp 172654320 64767859
15:56:28.764926 lh2-6.12865  bk1-6.33541: P 1:257(256) ack 257 win 53
nop,nop,timestamp 172654320 64767859
15:56:28.764936 bk1-6.33541  lh2-6.12865: . ack 257 win 54
nop,nop,timestamp 64767860 172654320
15:56:28.764962 bk1-6.46315  lh2-6.35704: S 3205139672:3205139672(0)
win 5760 mss 1440,sackOK,timestamp 64767860 0,nop,wscale 7
15:56:28.765052 lh2-6.35704  bk1-6.46315: S 1557432368:1557432368(0)
ack 3205139673 win 5712 mss 1440,sackOK,timestamp 172654320
64767860,nop,wscale 7
15:56:28.765061 bk1-6.46315  lh2-6.35704: . ack 1 win 45
nop,nop,timestamp 64767860 172654320
15:56:28.765300 lh2-6.35704  bk1-6.46315: . 1:1429(1428) ack 1 win 45
nop,nop,timestamp 172654320 64767860
15:56:28.765306 lh2-6.35704  bk1-6.46315: . 1429:2857(1428) ack 1 win
45 nop,nop,timestamp 172654320 64767860
15:56:28.967565 lh2-6.35704  bk1-6.46315: . 1:1429(1428) ack 1 win 45
nop,nop,timestamp 172654371 64767860
15:56:28.967581 bk1-6.46315  lh2-6.35704: . ack 1429 win 68
nop,nop,timestamp 64767910 172654371
15:56:28.967691 lh2-6.35704  bk1-6.46315: . 1429:2857(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910
15:56:28.967702 bk1-6.46315  lh2-6.35704: . ack 2857 win 90
nop,nop,timestamp 64767910 172654371
15:56:28.967816 lh2-6.35704  bk1-6.46315: . 2857:4285(1428) ack 1 win
45 nop,nop,timestamp 172654371 64767910
15:56:28.967826 bk1-6.46315  lh2-6.35704: . ack 4285 win 112
nop,nop,timestamp 64767910

Re: [PATCH 2.6.17 2/9] NetXen: Hardware access routines

2006-08-25 Thread Sanjeev Jorapur


On Mon, 2006-08-21 at 07:03 -0700, Stephen Hemminger wrote:
 On Mon, 21 Aug 2006 13:57:23 +0530
 Amit S. Kale [EMAIL PROTECTED] wrote:
 
  We can certainly create a table for all error messages. It'll hurt 
  readability 
  of code in many of the other places where printks are used to indicate some 
  hardware error.
  -Amit
 
 My suggestion was intended as an way to handle multiple driver versions
 all using the same firmware or vice versa. By locking the firmware and
 driver version together you might make maintenance more difficult.

We misunderstood your earlier comment. The compatible driver  firmware
images have the same major  minor version numbers. Only the sub-version
numbers may be different. This gives us more flexibility in releasing
driver  firmware fixes.

Sanjeev.


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 01/3] d80211: add support for SIOCSIWRATE, SIOCSIWTXPOW and SIOCSIWPOWER

2006-08-25 Thread Jouni Malinen

On Mon, Aug 21, 2006 at 05:30:22PM -0700, Mohamed Abbas wrote:

 the attached patch will add support to handle these iw_handle 
 SIOC[S/G]IWRATE, SIOC[S/G]IWTXPOW and SIOC[S/G]IWPOWER.  It also  added 
 some changes in ieee80211_ioctl_giwrange function to report supported 
 channels and rates.  a call to ieee80211_hw_config is needed to infor 
 the low level driver about these changes, I guess we might need to add 
 flag to indicate which parameters was changed so the low level driver 
 does not need to make extra calls.

Could you please separate SIOCSIWRATE from the rest. I did not go
through the details yet, but I do not think the proposed change here
would match the the way rate control was designed in the Devicescape
stack and I would not like to see this getting in before more careful
review and explanation of how this is expected to work. The way I see
rate settings working is that the control would be applied to the rate
control algorithm and not to the list of rates itself.

-- 
Jouni MalinenPGP id EFC895FA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RTL8136

2006-08-25 Thread Darren Salt

In case you don't yet have an lspci dump for an RTL8136, here's one for a
device which is working with the r1000 driver which is supplied with Ubuntu
dapper (though the machine in question - a Toshiba Equium A110-233 - is
actually running Debian testing.)

I _should_ be able to test your patches once I've got a locally-compiled
kernel working for it.

[M-F-T set; not subscribed.]

-- 
| Darren Salt| linux or ds at  | nr. Ashington, | Toon
| RISC OS, Linux | youmustbejoking,demon,co,uk | Northumberland | Army
| + At least 4000 million too many people. POPULATION LEVEL IS UNSUSTAINABLE.

If no one uses it, there's a reason.
05:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. Unknown device 
8136 (rev 01)
Subsystem: Toshiba America Info Systems Unknown device ff00
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- 
Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- 
MAbort- SERR- PERR-
Latency: 0, Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 185
Region 0: I/O ports at 4000 [size=256]
Region 2: Memory at da00 (64-bit, non-prefetchable) [size=4K]
[virtual] Expansion ROM at d400 [disabled] [size=64K]
Capabilities: [40] Power Management version 2
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA 
PME(D0-,D1+,D2+,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=0 PME+
Capabilities: [48] Vital Product Data
Capabilities: [50] Message Signalled Interrupts: 64bit+ Queue=0/1 
Enable-
Address:   Data: 
Capabilities: [60] Express Endpoint IRQ 0
Device: Supported: MaxPayload 128 bytes, PhantFunc 0, ExtTag+
Device: Latency L0s 1us, L1 unlimited
Device: AtnBtn+ AtnInd+ PwrInd+
Device: Errors: Correctable- Non-Fatal- Fatal- Unsupported-
Device: RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
Device: MaxPayload 128 bytes, MaxReadReq 128 bytes
Link: Supported Speed 2.5Gb/s, Width x1, ASPM L0s, Port 0
Link: Latency L0s unlimited, L1 unlimited
Link: ASPM Disabled RCB 64 bytes CommClk+ ExtSynch-
Link: Speed 2.5Gb/s, Width x1
Capabilities: [84] Vendor Specific Information
Capabilities: [100] Advanced Error Reporting
Capabilities: [12c] Virtual Channel
Capabilities: [148] Device Serial Number 36-81-ec-10-00-00-10-01
Capabilities: [154] Power Budgeting

Re: [PATCH 01/3] d80211: add support for SIOCSIWRATE, SIOCSIWTXPOW and SIOCSIWPOWER

2006-08-25 Thread Mohamed Abbas

I will separate each pair of S/G in separate patch so it will be easier 
to discuss each set separately. I will provide the patch this weekend 
taking into account the comments provided


Thanks
Mohamed
Jouni Malinen wrote:


On Mon, Aug 21, 2006 at 05:30:22PM -0700, Mohamed Abbas wrote:

 

the attached patch will add support to handle these iw_handle 
SIOC[S/G]IWRATE, SIOC[S/G]IWTXPOW and SIOC[S/G]IWPOWER.  It also  added 
some changes in ieee80211_ioctl_giwrange function to report supported 
channels and rates.  a call to ieee80211_hw_config is needed to infor 
the low level driver about these changes, I guess we might need to add 
flag to indicate which parameters was changed so the low level driver 
does not need to make extra calls.
   



Could you please separate SIOCSIWRATE from the rest. I did not go
through the details yet, but I do not think the proposed change here
would match the the way rate control was designed in the Devicescape
stack and I would not like to see this getting in before more careful
review and explanation of how this is expected to work. The way I see
rate settings working is that the control would be applied to the rate
control algorithm and not to the list of rates itself.

 


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [IPV6] Q: corrupt checksums when transferring data

2006-08-25 Thread Stephen Hemminger

On Fri, 25 Aug 2006 11:13:48 -0700
Brandeburg, Jesse [EMAIL PROTECTED] wrote:

 I'm enabling e1000 to offload IPv6 since the 2.6.18+ kernels support it.
 The kernel I'm testing is 2.6.18-rc4.  

Yes, something is wrong with the GSO code. I am bisecting this bug
http://bugzilla.kernel.org/show_bug.cgi?id=7050


It looks like GSO is handing an IPV6 segment down to the sky2 driver
even though it asks for only NETIF_F_TSO.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: RTL8136

2006-08-25 Thread Francois Romieu

Darren Salt [EMAIL PROTECTED] :
 In case you don't yet have an lspci dump for an RTL8136, here's one for a
 device which is working with the r1000 driver which is supplied with Ubuntu
 dapper (though the machine in question - a Toshiba Equium A110-233 - is
 actually running Debian testing.)

Thanks. The MM region was correctly guessed. If the driver does not work,
you can try to s/RTL_CFG_1/RTL_CFG_2/ for the 0x8136 entry in the
rtl8169_pci_tbl array.

-- 
Ueimor
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PATCH] IPv6 Updates for net-2.6.19

2006-08-25 Thread Brian Haley


diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c9f74c1..9b50e0c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -703,6 +703,7 @@ void ip6_route_input(struct sk_buff *skb
.ip6_u = {
.daddr = iph-daddr,
.saddr = iph-saddr,
+   .fwmark = skb-nfmark,
.flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK,
},
},


I can't build the latest 2.6.19-git with this patch, skb-nfmark 
requires CONFIG_NETFILTER, which isn't in my .config.  The obvious 
workaround is the patch below, but that might not be what you want.  Can 
send my .config if you need it.


-Brian

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9b50e0c..dc880cc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -703,7 +703,9 @@ void ip6_route_input(struct sk_buff *skb
 			.ip6_u = {
 .daddr = iph-daddr,
 .saddr = iph-saddr,
+#ifdef CONFIG_NETFILTER
 .fwmark = skb-nfmark,
+#endif
 .flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK,
 			},
 		},

RE: [IPV6] Q: corrupt checksums when transferring data

2006-08-25 Thread Brandeburg, Jesse

Stephen Hemminger wrote:
 On Fri, 25 Aug 2006 11:13:48 -0700
 Brandeburg, Jesse [EMAIL PROTECTED] wrote:
 
 I'm enabling e1000 to offload IPv6 since the 2.6.18+ kernels support
 it. The kernel I'm testing is 2.6.18-rc4.
 
 Yes, something is wrong with the GSO code. I am bisecting this bug
   http://bugzilla.kernel.org/show_bug.cgi?id=7050
 
 
 It looks like GSO is handing an IPV6 segment down to the sky2 driver
 even though it asks for only NETIF_F_TSO.

Ah ha, I was wondering if that bug report on sky2 might be related to
this issue.  E1000 actually sends the data I think (it just has a bad
checksum) when handed a too long frame.  Seems like the stack should
never give us something longer than the MTU + enet header, esp with all
hardware offloads disabled.

So I have a very easy repro with netperf
on remote: netserver -4 -6
netperf -H lh2-6,6 -t TCP_MAERTS -- -m4K -S128K -s128K

The remote will generate the bad frames.

Jesse
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Fw: [Bugme-new] [Bug 7058] New: CONFIG_IP_ROUTE_FWMARK breaks rp_filter checks

2006-08-25 Thread Andrew Morton

Begin forwarded message:

Date: Fri, 25 Aug 2006 13:29:52 -0700
From: [EMAIL PROTECTED]
To: [EMAIL PROTECTED]
Subject: [Bugme-new] [Bug 7058] New: CONFIG_IP_ROUTE_FWMARK breaks rp_filter 
checks

http://bugzilla.kernel.org/show_bug.cgi?id=7058

   Summary: CONFIG_IP_ROUTE_FWMARK breaks rp_filter checks
Kernel Version: 2.6.17.8
Status: NEW
  Severity: normal
 Owner: [EMAIL PROTECTED]
 Submitter: [EMAIL PROTECTED]

Most recent kernel where this bug did not occur: bug present since at least
early 2.4.x
Distribution: debian
Hardware Environment: i386
Software Environment: router/firewall
Problem Description:
Using a fwmark as a key for selecting among multiple routing tables (via ip
rule command) breaks the rp_filter functionality since the fwmark field is not
initialized in function fib_validate_source. Because of this there is no way to
assure that outgoing and incoming packets use the same routing table.

Steps to reproduce:
You should set up a network environment where there are at least two different
links from the machine A to a remote host B, and firewall rules on A to mark
specific packets to this destination and back (say those destined to a certain
port only and corresponding replies). Set a default route from A to B using link
1 and a different routing table for marked packets using link 2 (e.g. ip rule
add fwmark 2 table 2; ip route table 2 add ...). (This is the setup used for a
VPN I manage)
When an incoming packet from link 2 arrives in fib_validate_source, the fwmark
field will not be set despite the presence of appropriate rules in the firewall,
and thus the wrong table will be used for the check causing the packet to be
refused.

I have prepared a small patch to resolve this issue. I've tested it for quite
some time and it worked flawlessly and without side effects. I'm pasting it
here: it just adds an argument to fib_validate_source so that it can set the
fwmark field and passes the proper value already present in every caller.

--- a/include/net/ip_fib.h  2006-08-09 20:08:14.0 +0200
+++ b/include/net/ip_fib.h  2006-08-09 19:44:44.0 +0200
@@ -234,7 +234,7 @@
 extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void 
*arg);
 extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void 
*arg);
 extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
-extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
+extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, u32 fwmark,
   struct net_device *dev, u32 *spec_dst, u32 
*itag);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result 
*res);

--- a/net/ipv4/fib_frontend.c   2006-08-07 06:18:54.0 +0200
+++ b/net/ipv4/fib_frontend.c   2006-08-09 19:43:45.0 +0200
@@ -160,14 +160,18 @@
- check, that packet arrived from expected physical interface.
  */

-int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
+int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, u32 fwmark,
struct net_device *dev, u32 *spec_dst, u32 *itag)
 {
struct in_device *in_dev;
struct flowi fl = { .nl_u = { .ip4_u =
  { .daddr = src,
.saddr = dst,
-   .tos = tos } },
+   .tos = tos,
+#ifdef CONFIG_IP_ROUTE_FWMARK
+   .fwmark = fwmark
+#endif
+ } },
.iif = oif };
struct fib_result res;
int no_addr, rpf;
--- a/net/ipv4/route.c  2006-08-09 20:08:47.0 +0200
+++ b/net/ipv4/route.c  2006-08-09 19:46:06.0 +0200
@@ -1606,6 +1606,11 @@
goto e_inval;
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
} else if (fib_validate_source(saddr, 0, tos, 0,
+#ifdef CONFIG_IP_ROUTE_FWMARK
+   skb-nfmark,
+#else
+   0, /* no fwmark dependant routing */
+#endif
dev, spec_dst, itag)  0)
goto e_inval;

@@ -1720,6 +1725,11 @@

err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 
+#ifdef CONFIG_IP_ROUTE_FWMARK
+ skb-nfmark,
+#else
+ 0, /* no fwmark dependant routing */
+#endif
  in_dev-dev, spec_dst, itag);
if (err  0) {
ip_handle_martian_source(in_dev-dev, in_dev, skb, daddr, 
@@ -1954,6 +1964,11 @@
int result;
result = fib_validate_source(saddr, daddr, tos,
 loopback_dev.ifindex,
+#ifdef CONFIG_IP_ROUTE_FWMARK
+

[patch] d80211: fix crash in ieee80211_rx_michael_mic_report()

2006-08-25 Thread David Kimdon

This fixes a crash at ieee80211.c line 3461, ieee80211_rx_michael_mic_report()
(rx-sdata-type == IEEE80211_IF_TYPE_AP).  rx.sdata needs to be set before
calling ieee80211_rx_michael_mic_report().

Signed-off-by: Elliot Schwartz [EMAIL PROTECTED]
Signed-off-by: David Kimdon [EMAIL PROTECTED]

Index: wireless-dev/net/d80211/ieee80211.c
===
--- wireless-dev.orig/net/d80211/ieee80211.c
+++ wireless-dev/net/d80211/ieee80211.c
@@ -3582,6 +3582,11 @@ void __ieee80211_rx(struct net_device *d
else
sta = rx.sta = NULL;
 
+   if (sta) {
+   rx.dev = sta-dev;
+   rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev);
+   }
+
 if ((status-flag  RX_FLAG_MMIC_ERROR)) {
ieee80211_rx_michael_mic_report(dev, hdr, sta, rx);
goto end;
@@ -3597,8 +3602,6 @@ void __ieee80211_rx(struct net_device *d
 
if (sta  !sta-assoc_ap  !(sta-flags  WLAN_STA_WDS) 
!local-iff_promiscs  !multicast) {
-   rx.dev = sta-dev;
-   rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev);
rx.u.rx.ra_match = 1;
ieee80211_invoke_rx_handlers(local, local-rx_handlers, rx,
 sta);

--
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [stable] [PATCH] bridge-netfilter: don't overwrite memory outside of skb

2006-08-25 Thread Greg KH

On Tue, Aug 22, 2006 at 05:19:28PM -0700, Stephen Hemminger wrote:
 The bridge netfilter code needs to check for space at the
 front of the skb before overwriting; otherwise if skb from
 device doesn't have headroom, then it will cause random
 memory corruption.
 
 Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

Queued to -stable, thanks.

greg k-h
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

IPSec kernel oops on ppc64

2006-08-25 Thread Joy Latten

I installed 2.6.17 + patch-2.6.18-rc4 + 2.6.18-rc4-mm2
onto two pSeries power 5 (ppc64 lpars) machines. I configured
IPSec using the configuration listed below. 

A ping from one machine to the other, hangs. 
No packets leave the machine issuing the ping.
When I tried sftp, I received following oops.

Has anyone else had problems with IPSec on pSeries?

[EMAIL PROTECTED] jml]# sftp hvracer1
Connecting to hvracer1...
kernel BUG in skb_to_sgvec at net/xfrm/xfrm_algo.c:620!
cpu 0x0: Vector: 700 (Program Check) at [c000466eb240]
pc: c035f2f4: .skb_to_sgvec+0x288/0x2ec
lr: d09605e0: .esp_output+0x340/0x494 [esp4]
sp: c000466eb4c0
   msr: 80029032
  current = 0xc00045a69910
  paca= 0xc0484400
pid   = 2213, comm = ssh
kernel BUG in skb_to_sgvec at net/xfrm/xfrm_algo.c:620!
enter ? for help
0:mon t
[c000466eb590] d09605e0 .esp_output+0x340/0x494 [esp4]
[c000466eb680] c0357bd4 .xfrm4_output_finish2+0x2b8/0x3d0
[c000466eb720] c0357ea0 .xfrm4_output+0x74/0x88
[c000466eb7a0] c031b188 .ip_queue_xmit+0x4a8/0x540
[c000466eb8a0] c032e9b8 .tcp_transmit_skb+0x820/0x890
[c000466eb960] c0331b74 .tcp_connect+0x308/0x3b0
[c000466eba00] c03361d0 .tcp_v4_connect+0x52c/0x6c0
[c000466ebb80] c0344664 .inet_stream_connect+0x10c/0x358
[c000466ebc60] c02dba14 .sys_connect+0xd8/0x120
[c000466ebd90] c02fe420 .compat_sys_socketcall+0xdc/0x214
[c000466ebe30] c000871c syscall_exit+0x0/0x40
--- Exception: c00 (System Call) at 07a9f8fc
SP (fc63f230) is in userspace



Configured IPSec as follows:

add x.x.x.55 x.x.x.206 esp 35590
-m transport
-E 3des-cbc 06183223c23a21e8b36c566b
-A hmac-md5 TAHITEST89ABCDEF;

add x.x.x.206 x.x.x.55 esp 12360
-m transport
-E 3des-cbc 06183223c23a21e8b36c566b
-A hmac-md5 TAHITEST89ABCDEF;

spdadd x.x.x.55 x.x.x.206 any -P in ipsec
esp/transport//require;

spdadd x.x.x.206 x.x.x.55 any -P out ipsec
esp/transport//require;

Same config on both machines, except for  spdadd entry. The in and out
are swapped on the other machine.


Regards,
Joy Latten
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup

2006-08-25 Thread Marc Bevand

Michael Chan mchan at broadcom.com writes:

 Turning off ASF is just a matter of changing some bits in NVRAM
 and recalculating the checksum.  If you need the tool to do this,
 I'll have someone send it to you.

 Note that on some of the blade servers, I believe ASF is vital
 and should not be disabled.

Still, it would be great if ASF could be disabled, because I have
noticed that when ASF is enabled, the tg3 driver automatically disables
TSO (TCP Segmentation Offloading). Here is a dmesg output from a server
where I am seeing that behavior:

  eth0: Tigon3 [partno(BCM95704A6) rev 2100 PHY(5704)] (PCIX:133MHz:64-bit) \
10/100/1000BaseT Ethernet 00:30:48:59:c4:94
  eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] Split[0] WireSpeed[1] TSOcap[0]
  [...]
  eth1: Tigon3 [partno(BCM95704A6) rev 2100 PHY(5704)] (PCIX:133MHz:64-bit) \
10/100/1000BaseT Ethernet 00:30:48:59:c4:95
  eth1: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] Split[0] WireSpeed[1] TSOcap[1]

Both interfaces are fundamentally TSO-capable, but since ASF is enabled
on eth0, tg3 disables TSO on this interface. Of course at this point it
is not even possible to use ethtool to re-enable it because the driver
considers eth0 as not TSO-capable at all.

As far as I know, the tg3 driver has been doing that since one of your
patches shipped with 2.6.11-rc2-bk3, Michael, see [1]. Here is the relevant
code snippet (line numbers are for 2.6.16):

  10835   if (tp-tg3_flags2  TG3_FLG2_HW_TSO) {
  10836   tp-tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
  10837   }
  10838   else if (GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5700 ||
  10839   GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5701 ||
  10840   tp-pci_chip_rev_id == CHIPREV_ID_5705_A0 ||
  10841   (tp-tg3_flags  TG3_FLAG_ENABLE_ASF) != 0) {
  10842   tp-tg3_flags2 = ~TG3_FLG2_TSO_CAPABLE;
  10843   } else {
  10844   tp-tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
  10845   }

The culprit is line 10841. Why is that done ?

[1] ftp://ftp.us.kernel.org:/pub/linux/kernel/v2.6/snapshots/old/
patch-2.6.11-rc2-bk3.log, patch-2.6.11-rc2-bk3.bz2

--
Marc Bevand


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PATCH] IPv6 Updates for net-2.6.19

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 15:29:25 -0400), Brian 
Haley [EMAIL PROTECTED] says:

  .saddr = iph-saddr,
  +   .fwmark = skb-nfmark,
  .flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK,
:
 I can't build the latest 2.6.19-git with this patch, skb-nfmark 
 requires CONFIG_NETFILTER, which isn't in my .config.  The obvious 
 workaround is the patch below, but that might not be what you want.  Can 
 send my .config if you need it.

Your fix is appropriate.

Acked-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]

--yoshfuji
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] [IPV6] ROUTE: Fix FWMARK support.

2006-08-25 Thread David Miller

From: YOSHIFUJI Hideaki [EMAIL PROTECTED]
Date: Sat, 26 Aug 2006 00:08:54 +0900 (JST)

 [IPV6] ROUTE: Fix FWMARK support.

 - Add missing nla_policy entry.
 - type of fwmark is u32, not u8.

 Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]

Applied, thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] [IPV6] ROUTE: Fix size of fib6_rule_policy.

2006-08-25 Thread David Miller

From: YOSHIFUJI Hideaki [EMAIL PROTECTED]
Date: Sat, 26 Aug 2006 00:09:19 +0900 (JST)

 [IPV6] ROUTE: Fix size of fib6_rule_policy.

 It should not be RTA_MAX+1 but FRA_MAX+1.

 Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]

Applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/3] [IPV6] ROUTE: Add support for fwmask in routing rules.

2006-08-25 Thread David Miller

From: YOSHIFUJI Hideaki [EMAIL PROTECTED]
Date: Sat, 26 Aug 2006 00:09:37 +0900 (JST)

 [IPV6] ROUTE: Add support for fwmask in routing rules.

 Add support for fwmark masks.
 A mask of 0x is used when a mark value != 0 is sent without a mask.

 Based on patch for net/ipv4/fib_rules.c by Patrick McHardy [EMAIL 
 PROTECTED].

 Signed-off-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]

Applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PATCH] IPv6 Updates for net-2.6.19

2006-08-25 Thread David Miller

From: YOSHIFUJI Hideaki [EMAIL PROTECTED]
Date: Sat, 26 Aug 2006 07:44:38 +0900 (JST)

 In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 15:29:25 -0400), Brian 
 Haley [EMAIL PROTECTED] says:

 .saddr = iph-saddr,
   + .fwmark = skb-nfmark,
 .flowlabel = (* (u32 *) iph)IPV6_FLOWINFO_MASK,
 :
  I can't build the latest 2.6.19-git with this patch, skb-nfmark 
  requires CONFIG_NETFILTER, which isn't in my .config.  The obvious 
  workaround is the patch below, but that might not be what you want.  Can 
  send my .config if you need it.

 Your fix is appropriate.

 Acked-by: YOSHIFUJI Hideaki [EMAIL PROTECTED]

It seems to make better sense to protect this with
IPV6_ROUTE_FWMARK instead of NETFILTER.  And it is
consistent with ipv4 side.

So that's how I will fix this build problem.

Thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [GIT PATCH] IPv6 Updates for net-2.6.19

2006-08-25 Thread YOSHIFUJI Hideaki / 吉藤英明

In article [EMAIL PROTECTED] (at Fri, 25 Aug 2006 16:06:58 -0700 (PDT)), 
David Miller [EMAIL PROTECTED] says:

+   .fwmark = skb-nfmark,
:
 It seems to make better sense to protect this with
 IPV6_ROUTE_FWMARK instead of NETFILTER.  And it is
 consistent with ipv4 side.
 
 So that's how I will fix this build problem.

Agreed.

--yoshfuji
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [IPV4]: Add support for fwmark masks in routing rules

2006-08-25 Thread David Miller

From: Patrick McHardy [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 14:14:05 +0200

 [IPV4]: Add support for fwmark masks in routing rules

 Add a FRA_FWMASK attributes for fwmark masks. For compatibility a mask of
 0x is used when a mark value != 0 is sent without a mask.

 Signed-off-by: Patrick McHardy [EMAIL PROTECTED]

Applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [DECNET]: Add support for fwmark masks in routing rules

2006-08-25 Thread David Miller

From: Patrick McHardy [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 14:14:12 +0200

 [DECNET]: Add support for fwmark masks in routing rules

 Add support for fwmark masks. For compatibility a mask of 0x is used
 when a mark value != 0 is sent without a mask.

 Signed-off-by: Patrick McHardy [EMAIL PROTECTED]

Applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [NET_SCHED]: Add mask support to fwmark classifier

2006-08-25 Thread David Miller

From: Patrick McHardy [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 14:14:18 +0200

 [NET_SCHED]: Add mask support to fwmark classifier

 Support masking the nfmark value before the search. The mask value is
 global for all filters contained in one instance. It can only be set
 when a new instance is created, all filters must specify the same mask.

 Signed-off-by: Patrick McHardy [EMAIL PROTECTED]

Applied, thanks a lot.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: IPSec kernel oops on ppc64

2006-08-25 Thread James Morris

On Fri, 25 Aug 2006, Joy Latten wrote:

 I installed 2.6.17 + patch-2.6.18-rc4 + 2.6.18-rc4-mm2
 onto two pSeries power 5 (ppc64 lpars) machines. I configured
 IPSec using the configuration listed below. 

Confirming that this does not crash on i686 or x86_64.



- james
-- 
James Morris
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: IPSec kernel oops on ppc64

2006-08-25 Thread David Miller

From: James Morris [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 19:15:57 -0400 (EDT)

 On Fri, 25 Aug 2006, Joy Latten wrote:

  I installed 2.6.17 + patch-2.6.18-rc4 + 2.6.18-rc4-mm2
  onto two pSeries power 5 (ppc64 lpars) machines. I configured
  IPSec using the configuration listed below. 

 Confirming that this does not crash on i686 or x86_64.

Probably best to start with retesting with 2.6.18-rc4, and
if that crashes too it is time to think seriously about
a miscompile on ppc64.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4] nfs: deadlock prevention for NFS

2006-08-25 Thread Peter Zijlstra

On Fri, 2006-08-25 at 16:14 -0400, Trond Myklebust wrote:
 Grumble... If your patches are targetting NFS, could you please at the
 very least Cc [EMAIL PROTECTED] and/or myself.

Sorry, will make sure you're on the CC list next round.

 On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote:
  Provide a proper a_ops-swapfile() implementation for NFS. This will
  set the NFS socket to SOCK_VMIO and put the socket reconnection under
  PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done).
  
  Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
  ---
   fs/nfs/file.c   |   21 -
   include/linux/sunrpc/xprt.h |4 +++-
   net/sunrpc/xprtsock.c   |   16 
   3 files changed, 39 insertions(+), 2 deletions(-)
  
  Index: linux-2.6/fs/nfs/file.c
  ===
  --- linux-2.6.orig/fs/nfs/file.c
  +++ linux-2.6/fs/nfs/file.c
  @@ -27,6 +27,7 @@
   #include linux/slab.h
   #include linux/pagemap.h
   #include linux/smp_lock.h
  +#include net/sock.h
   
   #include asm/uaccess.h
   #include asm/system.h
  @@ -317,7 +318,25 @@ static int nfs_release_page(struct page 
   
   static int nfs_swapfile(struct address_space *mapping, int enable)
   {
  -   return 0;
  +   int err = -EINVAL;
  +   struct rpc_clnt *client = NFS_CLIENT(mapping-host);
  +   struct sock *sk = client-cl_xprt-inet;
  +
  +   if (enable) {
  +   client-cl_xprt-swapper = 1;
  +   /*
  +* keep one extra sock reference so the reserve won't dip
  +* when the socket gets reconnected.
  +*/
  +   sk_adjust_memalloc(1, 1);
  +   err = sk_set_vmio(sk);
  +   } else if (client-cl_xprt-swapper) {
  +   client-cl_xprt-swapper = 0;
  +   sk_adjust_memalloc(-1, -1);
  +   err = sk_clear_vmio(sk);
  +   }
  +
  +   return err;
   }
 
 This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business
 screwing around with the internals of the sunrpc transport.

Ok, I'll make a function there, and call that.

   const struct address_space_operations nfs_file_aops = {
  Index: linux-2.6/net/sunrpc/xprtsock.c
  ===
  --- linux-2.6.orig/net/sunrpc/xprtsock.c
  +++ linux-2.6/net/sunrpc/xprtsock.c
  @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
   {
  struct rpc_xprt *xprt = (struct rpc_xprt *) args;
  struct socket *sock = xprt-sock;
  +   unsigned long pflags = current-flags;
  int err, status = -EIO;
   
  if (xprt-shutdown || xprt-addr.sin_port == 0)
  @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
   
  dprintk(RPC:  xs_udp_connect_worker for xprt %p\n, xprt);
   
  +   if (xprt-swapper)
  +   current-flags |= PF_MEMALLOC;
  +
  /* Start by resetting any existing state */
  xs_close(xprt);
   
  @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
  xprt-sock = sock;
  xprt-inet = sk;
   
  +   if (xprt-swapper)
  +   sk_set_vmio(sk);
  +
  write_unlock_bh(sk-sk_callback_lock);
  }
  xs_udp_do_set_buffer_size(xprt);
  @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
   out:
  xprt_wake_pending_tasks(xprt, status);
  xprt_clear_connecting(xprt);
  +   current-flags = pflags;
   }
   
   /*
  @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
   {
  struct rpc_xprt *xprt = (struct rpc_xprt *)args;
  struct socket *sock = xprt-sock;
  +   unsigned long pflags = current-flags;
  int err, status = -EIO;
   
  if (xprt-shutdown || xprt-addr.sin_port == 0)
  goto out;
   
  +   if (xprt-swapper)
  +   current-flags |= PF_MEMALLOC;
  +
  dprintk(RPC:  xs_tcp_connect_worker for xprt %p\n, xprt);
   
  if (!xprt-sock) {
  @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void *
  break;
  }
  }
  +
  +   if (xprt-swapper)
  +   sk_set_vmio(xprt-inet);
   out:
  xprt_wake_pending_tasks(xprt, status);
   out_clear:
  xprt_clear_connecting(xprt);
  +   current-flags = pflags;
   }
 
 How does this guarantee that the socket reconnection won't fail?

I was afraid this might not be enough, I really have to go through the
network code.

 Also, what about the case of rpc_malloc()? Can't that cause rpciod to
 deadlock when you add NFS swap into the equation?

I will have to plead ignorance for now, I'll look into this on monday.
On first glance it looks like rpc_malloc could use an |__GFP_EMERG for
RPC_TASK_SWAPPER.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4] nfs: deadlock prevention for NFS

2006-08-25 Thread Trond Myklebust

Grumble... If your patches are targetting NFS, could you please at the
very least Cc [EMAIL PROTECTED] and/or myself.


On Fri, 2006-08-25 at 17:40 +0200, Peter Zijlstra wrote:
 Provide a proper a_ops-swapfile() implementation for NFS. This will
 set the NFS socket to SOCK_VMIO and put the socket reconnection under
 PF_MEMALLOC (I hope this is enough, otherwise more work needs to be done).
 
 Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
 ---
  fs/nfs/file.c   |   21 -
  include/linux/sunrpc/xprt.h |4 +++-
  net/sunrpc/xprtsock.c   |   16 
  3 files changed, 39 insertions(+), 2 deletions(-)
 
 Index: linux-2.6/fs/nfs/file.c
 ===
 --- linux-2.6.orig/fs/nfs/file.c
 +++ linux-2.6/fs/nfs/file.c
 @@ -27,6 +27,7 @@
  #include linux/slab.h
  #include linux/pagemap.h
  #include linux/smp_lock.h
 +#include net/sock.h
  
  #include asm/uaccess.h
  #include asm/system.h
 @@ -317,7 +318,25 @@ static int nfs_release_page(struct page 
  
  static int nfs_swapfile(struct address_space *mapping, int enable)
  {
 - return 0;
 + int err = -EINVAL;
 + struct rpc_clnt *client = NFS_CLIENT(mapping-host);
 + struct sock *sk = client-cl_xprt-inet;
 +
 + if (enable) {
 + client-cl_xprt-swapper = 1;
 + /*
 +  * keep one extra sock reference so the reserve won't dip
 +  * when the socket gets reconnected.
 +  */
 + sk_adjust_memalloc(1, 1);
 + err = sk_set_vmio(sk);
 + } else if (client-cl_xprt-swapper) {
 + client-cl_xprt-swapper = 0;
 + sk_adjust_memalloc(-1, -1);
 + err = sk_clear_vmio(sk);
 + }
 +
 + return err;
  }

This all belongs in net/sunrpc/xprtsock.c. The NFS code has no business
screwing around with the internals of the sunrpc transport.

  const struct address_space_operations nfs_file_aops = {
 Index: linux-2.6/net/sunrpc/xprtsock.c
 ===
 --- linux-2.6.orig/net/sunrpc/xprtsock.c
 +++ linux-2.6/net/sunrpc/xprtsock.c
 @@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
  {
   struct rpc_xprt *xprt = (struct rpc_xprt *) args;
   struct socket *sock = xprt-sock;
 + unsigned long pflags = current-flags;
   int err, status = -EIO;
  
   if (xprt-shutdown || xprt-addr.sin_port == 0)
 @@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
  
   dprintk(RPC:  xs_udp_connect_worker for xprt %p\n, xprt);
  
 + if (xprt-swapper)
 + current-flags |= PF_MEMALLOC;
 +
   /* Start by resetting any existing state */
   xs_close(xprt);
  
 @@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
   xprt-sock = sock;
   xprt-inet = sk;
  
 + if (xprt-swapper)
 + sk_set_vmio(sk);
 +
   write_unlock_bh(sk-sk_callback_lock);
   }
   xs_udp_do_set_buffer_size(xprt);
 @@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
  out:
   xprt_wake_pending_tasks(xprt, status);
   xprt_clear_connecting(xprt);
 + current-flags = pflags;
  }
  
  /*
 @@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
  {
   struct rpc_xprt *xprt = (struct rpc_xprt *)args;
   struct socket *sock = xprt-sock;
 + unsigned long pflags = current-flags;
   int err, status = -EIO;
  
   if (xprt-shutdown || xprt-addr.sin_port == 0)
   goto out;
  
 + if (xprt-swapper)
 + current-flags |= PF_MEMALLOC;
 +
   dprintk(RPC:  xs_tcp_connect_worker for xprt %p\n, xprt);
  
   if (!xprt-sock) {
 @@ -1170,10 +1182,14 @@ static void xs_tcp_connect_worker(void *
   break;
   }
   }
 +
 + if (xprt-swapper)
 + sk_set_vmio(xprt-inet);
  out:
   xprt_wake_pending_tasks(xprt, status);
  out_clear:
   xprt_clear_connecting(xprt);
 + current-flags = pflags;
  }

How does this guarantee that the socket reconnection won't fail?

Also, what about the case of rpc_malloc()? Can't that cause rpciod to
deadlock when you add NFS swap into the equation?

Cheers,
  Trond

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] IPV6 : segmentation offload not set correctly on TCP children

2006-08-25 Thread Stephen Hemminger

TCP over IPV6 would incorrectly inherit the GSO settings.
This would cause kernel to send Tcp Segmentation Offload packets for
IPV6 data to devices that can't handle it. It caused the sky2 driver
to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050
and the e1000 would generate bogus packets. I can't blame the
hardware for gagging if the upper layers feed it garbage.

This was a new bug in 2.6.18 introduced with GSO support.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]


--- linux-2.6.orig/net/ipv6/tcp_ipv6.c  2006-08-03 09:09:16.0 -0700
+++ linux-2.6/net/ipv6/tcp_ipv6.c   2006-08-25 15:30:31.0 -0700
@@ -944,7 +944,7 @@
 * comment in that function for the gory details. -acme
 */
 
-   sk-sk_gso_type = SKB_GSO_TCPV6;
+   newsk-sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(newsk, dst, NULL);
 
newtcp6sk = (struct tcp6_sock *)newsk;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] IPV6 : segmentation offload not set correctly on TCP children

2006-08-25 Thread David Miller

From: Stephen Hemminger [EMAIL PROTECTED]
Date: Fri, 25 Aug 2006 15:43:53 -0700

 TCP over IPV6 would incorrectly inherit the GSO settings.
 This would cause kernel to send Tcp Segmentation Offload packets for
 IPV6 data to devices that can't handle it. It caused the sky2 driver
 to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050
 and the e1000 would generate bogus packets. I can't blame the
 hardware for gagging if the upper layers feed it garbage.

 This was a new bug in 2.6.18 introduced with GSO support.

 Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

Good catch.  Applied, thanks Stephen.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [2.6.17.8] NFS stall / BUG in UDP fragment processing / SKB trimming

2006-08-25 Thread Greg KH

On Sun, Aug 13, 2006 at 10:59:11PM +1000, Herbert Xu wrote:
 On Sat, Aug 12, 2006 at 09:19:19PM +, Nix wrote:
  
  The kernel log showed a heap of BUGs from somewhere inside the skb
  management layer, somewhere in UDP fragment processing while
  handling NFS requests. It starts like this:
  
  Aug 12 21:31:08 hades warning: kernel: BUG: warning at 
  include/linux/skbuff.h:975/__skb_trim()
  Aug 12 21:31:08 hades warning: kernel: c030ed39 
  ip_append_data+0x5b3/0x951  c030fc18 ip_generic_getfrag+0x0/0x96
 
 Oops, I missed this code path when I disallowed skb_trim from operating
 on a paged skb.  This patch should fix the problem.
 
 Greg, we need this for 2.6.17 stable as well if Dave is OK with it.

This patch doesn't apply at all to the latest 2.6.17-stable kernel tree.
Care to rediff it?

thanks,

greg k-h
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] IPV6 : segmentation offload not set correctly on TCP children

2006-08-25 Thread Thomas Glanzmann

Hello Stephen,
thanks for the fix, it fixes the problem for me. I closed the bug. On
which hardware did you reproduce the bug and how did you found it? Did
you use git bisect?

Thomas
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [IPV6] Q: corrupt checksums when transferring data

2006-08-25 Thread Brandeburg, Jesse

Stephen Hemminger wrote:
 I think this the problem. Does it fix e1000? I am testing now.
 
 TCP over IPV6 would incorrectly inherit the GSO settings on accepted
 children.
 
 --- linux-2.6.orig/net/ipv6/tcp_ipv6.c2006-08-03
09:09:16.0
 -0700 +++ linux-2.6/net/ipv6/tcp_ipv6.c   2006-08-25
15:30:31.0
 -0700 @@ -944,7 +944,7 @@
* comment in that function for the gory details. -acme
*/
 
 - sk-sk_gso_type = SKB_GSO_TCPV6;
 + newsk-sk_gso_type = SKB_GSO_TCPV6;
   __ip6_dst_store(newsk, dst, NULL);
 
   newtcp6sk = (struct tcp6_sock *)newsk;

ah, no more errors, I didn't go through and validate much more past
that. I'm now able to do hardware offloads with no errors.

I think it's a good patch, at least it makes sense to me and works for
me.

Thanks!
 Jesse
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] tcp_bic: use BUILD_BUG_ON

2006-08-25 Thread Stephen Hemminger

Please fix the other variants as well htcp, cubic, veno, vegas, ...
Just one patch is necessary with all the files.

-- 
Stephen Hemminger [EMAIL PROTECTED]

All non-trivial abstractions, to some degree, are leaky.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] ethtool v4: add autoneg advertise feature

2006-08-25 Thread Bill Fink

On Thu, 24 Aug 2006, Michael Chan wrote:

 Jeff Kirsher wrote:
 
  The old way of setting autonegotiation was using the 
  following command:
  ethtool -s ethx speed 100 duplex full auto on
  now the command would be
  ethtool -s ethx auto on advertise 0x08
  both commands would result in only advertising 100 FULL.
  
  There still needs to be a change made to the man file to reflect the
  change in the behavior of ethtool, which I have not done.  But this
  patch will allow for greater flexibility in setting autonegotiation
  speeds.
 
 It is more flexible, but less intuitive.  The user now has to
 remember hex values instead of the more intuitive speed and
 duplex.  Perhaps we can keep the old method of using speed and
 duplex, while adding the new method of specifying hex values? 

I agree.  Something like:

ethtool -s ethx auto on advertise mode1+mode2+...+moden

For example:

ethtool -s ethx auto on advertise 100-half+100-full

to set speed 100 either half or full duplex.

Maybe have some abbreviations such as 100-all (same as above) or
all-half (for all supported half duplex) or just all (for all supported
modes), which I suppose is the default.

Just an idea.

-Bill
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH -rt DO NOT APPLY] Fix for tg3 networking lockup

2006-08-25 Thread Marc Bevand


On 8/25/06, Michael Chan [EMAIL PROTECTED] wrote:


The reason is that TSO on 5704 and older chips is done by firmware.  ASF
is also implemented by firmware.  If ASF is enabled, there is no room to
do TSO and ASF at the same time.


Just for test purpose, I have applied the following patch to my tg3.c. I now
seem to be able to enable/disable TSO, but I admit don't know whether ASF
is still functional or not.

   else if (GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5700 ||
   GET_ASIC_REV(tp-pci_chip_rev_id) == ASIC_REV_5701 ||
-   tp-pci_chip_rev_id == CHIPREV_ID_5705_A0 ||
-   (tp-tg3_flags  TG3_FLAG_ENABLE_ASF) != 0) {
+   tp-pci_chip_rev_id == CHIPREV_ID_5705_A0) {
   tp-tg3_flags2 = ~TG3_FLG2_TSO_CAPABLE;
   } else {

Then tg3 considered my interface as TSO-capable (TSOcap[1] in dmesg).
TSO was still disabled by default, which is normal because there is this other
check a couple of lines below:

   /* TSO is on by default on chips that support hardware TSO.
* Firmware TSO on older chips gives lower performance, so it
* is off by default, but can be enabled using ethtool.
*/
   if (tp-tg3_flags2  TG3_FLG2_HW_TSO)
   dev-features |= NETIF_F_TSO;

But I was able to turn TSO on via ethtool -K. This is exactly the behavior I
would like to see in tg3. So are you saying the patch I applied actually
breaks ASF ?


Firmware-based TSO is actually slower than no TSO.  The only benefit is
a little better CPU utilization.


I know, in one of my test-cases, firmware TSO reduces the max achievable
TCP bandwidth from 930 to 840 Mbit/s on a GigE network while reducing
the CPU utilization from 44% to 22%. I think firmware TSO still makes sense
in some cases.

--
Marc Bevand
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Congestion control (modulo lp, bic): use BUILD_BUG_ON

2006-08-25 Thread Alexey Dobriyan

Signed-off-by: Alexey Dobriyan [EMAIL PROTECTED]
---

 net/ipv4/tcp_cubic.c |2 +-
 net/ipv4/tcp_highspeed.c |2 +-
 net/ipv4/tcp_htcp.c  |2 +-
 net/ipv4/tcp_hybla.c |2 +-
 net/ipv4/tcp_vegas.c |2 +-
 net/ipv4/tcp_veno.c  |2 +-
 net/ipv4/tcp_westwood.c  |2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictc
 
 static int __init cubictcp_register(void)
 {
-   BUG_ON(sizeof(struct bictcp)  ICSK_CA_PRIV_SIZE);
+   BUILD_BUG_ON(sizeof(struct bictcp)  ICSK_CA_PRIV_SIZE);
 
/* Precompute a bunch of the scaling factors that are used per-packet
 * based on SRTT of 100ms
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_hig
 
 static int __init hstcp_register(void)
 {
-   BUG_ON(sizeof(struct hstcp)  ICSK_CA_PRIV_SIZE);
+   BUILD_BUG_ON(sizeof(struct hstcp)  ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(tcp_highspeed);
 }
 
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = 
 
 static int __init htcp_register(void)
 {
-   BUG_ON(sizeof(struct htcp)  ICSK_CA_PRIV_SIZE);
+   BUILD_BUG_ON(sizeof(struct htcp)  ICSK_CA_PRIV_SIZE);
BUILD_BUG_ON(BETA_MIN = BETA_MAX);
return tcp_register_congestion_control(htcp);
 }
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hyb
 
 static int __init hybla_register(void)
 {
-   BUG_ON(sizeof(struct hybla)  ICSK_CA_PRIV_SIZE);
+   BUILD_BUG_ON(sizeof(struct hybla)  ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(tcp_hybla);
 }
 
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_veg
 
 static int __init tcp_vegas_register(void)
 {
-   BUG_ON(sizeof(struct vegas)  ICSK_CA_PRIV_SIZE);
+   BUILD_BUG_ON(sizeof(struct vegas)  ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(tcp_vegas);
return 0;
 }
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -213,7 +213,7 @@ static struct tcp_congestion_ops tcp_ven
 
 static int __init tcp_veno_register(void)
 {
-   BUG_ON(sizeof(struct veno)  ICSK_CA_PRIV_SIZE);
+   BUILD_BUG_ON(sizeof(struct veno)  ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(tcp_veno);
return 0;
 }
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_wes
 
 static int __init tcp_westwood_register(void)
 {
-   BUG_ON(sizeof(struct westwood)  ICSK_CA_PRIV_SIZE);
+   BUILD_BUG_ON(sizeof(struct westwood)  ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(tcp_westwood);
 }
 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

1 2 >

1 - 100 of 122 matches

Mail list logo