patch for caching TCP options with syncookies
greetings! this is my first creation of a patch for the linux kernel. if you have time, could you please take a look at it and give me some feedback. this patch creates a syn_cache for caching TCP options when syn_cookies are in use (by default, all TCP options are lost when using syncookies). any feedback on the implementation of this cache would also be appreciated. if anybody's interested, i have also written a paper on this project. jensen diff -Naur linux-2.6.11.11/include/net/tcp.h linux_new-2.6.11.11/include/net/tcp.h --- linux-2.6.11.11/include/net/tcp.h 2005-05-27 05:06:46.0 + +++ linux_new-2.6.11.11/include/net/tcp.h 2006-03-15 07:21:39.0 + @@ -669,6 +669,32 @@ } af; }; +/* added struct for caching syn_options */ +struct syn_opt { + struct hlist_node hentry; + __u32 isn_key; + unsigned long expires; + __u8snd_wscale : 4, + tstamp_ok : 1, + sack_ok : 1, + wscale_ok : 1; +}; + +struct syn_hash_bucket { + rwlock_t lock; + struct hlist_head chain; + __u8size; +}; + +extern struct syn_hash_bucket *syn_hasht; +extern struct timer_list synhashtimer; + +/* + * change these values to increase (or decrease) the SYNHASH size + */ +#define SYNHASH_SIZE 512 +#define SYNHASH_BUCKET 30 + /* SLAB cache for open requests. */ extern kmem_cache_t *tcp_openreq_cachep; @@ -681,6 +707,12 @@ tcp_openreq_fastfree(req); } +/* SLAB cache for syn_opt. */ +extern kmem_cache_t *syn_opt_cachep; + +#define syn_opt_alloc() kmem_cache_alloc(syn_opt_cachep, SLAB_ATOMIC) +#define syn_opt_fastfree(syn_req) kmem_cache_free(syn_opt_cachep, syn_req) + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #define TCP_INET_FAMILY(fam) ((fam) == AF_INET) #else diff -Naur linux-2.6.11.11/net/ipv4/syncookies.c linux_new-2.6.11.11/net/ipv4/syncookies.c --- linux-2.6.11.11/net/ipv4/syncookies.c 2005-05-27 05:06:46.0 + +++ linux_new-2.6.11.11/net/ipv4/syncookies.c 2006-03-15 07:22:34.0 + @@ -19,6 +19,7 @@ #include linux/random.h #include linux/kernel.h #include net/tcp.h +#include linux/list.h extern int sysctl_tcp_syncookies; @@ -121,6 +122,9 @@ int mss; struct rtable *rt; __u8 rcv_wscale; + struct syn_opt *tmp, *found; + struct hlist_node *pos; + int n; // key for hash table if (!sysctl_tcp_syncookies || !skb-h.th-ack) goto out; @@ -162,11 +166,38 @@ } } - req-snd_wscale = req-rcv_wscale = req-tstamp_ok = 0; - req-wscale_ok = req-sack_ok = 0; + /* look up cached syn options in hash table */ + n = cookie % SYNHASH_SIZE; + read_lock(syn_hasht[n].lock); + hlist_for_each_entry(tmp, pos, syn_hasht[n].chain, hentry) { + if (cookie == tmp-isn_key) { + if (!(time_after(jiffies, tmp-expires))) { + found = tmp; + break; + } + // FOUND COOKIE, BUT EXPIRED + else { + found = NULL; + break; + } + } + } + read_unlock(syn_hasht[n].lock); + + /* must check if found exists. may have expired */ + if (found) { + req-snd_wscale = found-snd_wscale; + req-tstamp_ok = found-tstamp_ok; + req-wscale_ok = found-wscale_ok; + req-sack_ok= found-sack_ok; + } + else { + req-snd_wscale = req-rcv_wscale = req-tstamp_ok = 0; + req-wscale_ok = req-sack_ok = 0; + } req-expires= 0UL; req-retrans= 0; - + /* * We need to lookup the route here to get at the correct * window size. We should better make sure that the window size @@ -194,8 +225,10 @@ req-window_clamp = dst_metric(rt-u.dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req-mss, req-rcv_wnd, req-window_clamp, - 0, rcv_wscale); + req-wscale_ok, rcv_wscale); + /* BTW win scale with syncookies is 0 by definition */ + /* this is not true with syn_cache */ req-rcv_wscale = rcv_wscale; ret = get_cookie_sock(sk, skb, req, rt-u.dst); diff -Naur linux-2.6.11.11/net/ipv4/tcp.c linux_new-2.6.11.11/net/ipv4/tcp.c --- linux-2.6.11.11/net/ipv4/tcp.c 2005-05-27 05:06:46.0 + +++ linux_new-2.6.11.11/net/ipv4/tcp.c 2006-03-15 07:21:59.0 + @@ -257,6 +257,7 @@ #include linux/fs.h #include linux/random.h #include linux/bootmem.h +#include linux/list.h #include net/icmp.h #include net/tcp.h @@ -272,9 +273,13 @@
Re: patch for caching TCP options with syncookies
On 3/15/06, jensen galan [EMAIL PROTECTED] wrote: greetings! this is my first creation of a patch for the linux kernel. if you have time, could you please take a look at it and give me some feedback. this patch creates a syn_cache for caching TCP options when syn_cookies are in use (by default, all TCP options are lost when using syncookies). any feedback on the implementation of this cache would also be appreciated. Interesting, but... if anybody's interested, i have also written a paper on this project. jensen diff -Naur linux-2.6.11.11/include/net/tcp.h linux_new-2.6.11.11/include/net/tcp.h ... can you please update your patch to the latest kernel tree? Preferably David Miller's net-2.6.17 git tree, available at www.kernel.org/git. You'll notice some differences :-) Then repost and we can continue the discussion as I'll probably do some work in this area to support DCCP's Init Cookies. Thanks, - Arnaldo - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
MR. Kings Roger
MR. Kings Roger Standard Bank Plc Johannesburg, South Africa ID Number: 0092-042 UNTERSTÜTZUNG Dringend GESCHAEFTSANGEBOT Mein Name ist Kings Roger und ich bin der Leiter des Corporate Affairs Committee in der Standard Bank of South-Africa PLC in Südafrika. Zur Zeit halte ich mich in Deutschland zu einer Fortbildung auf. Ich kontaktiere Sie bezüglich des Transfers einer sehr großen Summe Geldes vom Konto eines Verstorbenen. Ich weiß, daß eine Transaktion dieser Größenordnung zunächst bei jedem Besorgnis erregen wird und versichere ich Ihnen, daß sich um alles gekümmert wird.Aufgrund der Dringlichkeit der Angelegenheit habe ich mich entschlossen, Sie zu kontaktieren. Es geht um folgendes: Einer meiner Kollegen ist für das Konto von Gerald Erbes zuständig, welcher im Oktober bei einem Flugzeugabsturz ums Leben kam. Er befand sich gemeinsam mit anderen Passagieren an Bord einer Egyptian Airline 990. Seit diesem Vorfall ist niemand seiner nächsten Verwandten mehr am Leben, der als sein Erbe Ansprüche auf das Guthaben auf seinem Konto erheben könnte. Wir können jedoch gemäß unserer Richtlinien das Geld nicht auszahlen, bevor jemand als Angehöriger und Erbe auftritt und seinen Anspruch geltend macht. Aufgrund dieser Entdeckung und der Übereinstimmung Ihres Namens mit dem des Verstorbenen bitten meine Kollegen und ich Sie nun um Ihre Erlaubnis, Sie als nächsten Angehörigen des Verstorbenen anzugeben. Die gesamte Abwicklung und Dokumentation wird sorgfältig von mir durchgeführt, damit das Guthaben von 26 Millionen US$ an Sie als nächsten Angehörigen ausgezahlt werden kann. Andernfalls wird die gesamte Summe nach fünf Jahren in das Eigentum der Bank übergehen und die Direktoren der Bank werden sie untereinander aufteilen. Aufgrund dieser Tatsache habe ich mich entschlossen, mich an Sie zu wenden, damit Sie als Erbe auftreten können und nicht alles den Direktoren zugute kommt. Da aber die Person, die im Testament als Erbin genannt wird, mit ihm gemeinsam verstorben ist, haben wir vom Nachlaßverwalter den Auftrag bekommen, ein Familienmitglied des Verstorbenen ausfindig zu machen, daß das Erbe antreten kann. Wir bitten Sie, unseren Vorschlag anzunehmen und versichern Ihnen, daß alles absolut risikofrei für Sie ablaufen wird. Wir werden Sie mit 6,5 Millionen US$ an der Transaktion beteiligen, den restlichen Betrag werden meine Kollegen und ich für. Falls Sie interessiert sind, schicken Sie mir bitte folgende Angaben: 1. Name/Firmen Name um die erforderlichen Dokumente vorzubereiten 2. Persönliche Telefon- und Fax-Nummern zu meiner privaten Email : [EMAIL PROTECTED], damit ich die weiteren relevanten Details in dieser Sache zu Ihnen mitteilen kann. Vielen Dank im Voraus. Wir bitten sie eindringlich, diese Angelegenheit vertraulich zu behandeln. Bitte antworten Sie mir schnellstmöglich und Gott segne sie. Mit freundlichen Gruessen, Mr. Kings Roger STANDARD BANK PLC - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
MR. Kings Roger
MR. Kings Roger Standard Bank Plc Johannesburg, South Africa ID Number: 0092-042 UNTERSTÜTZUNG Dringend GESCHAEFTSANGEBOT Mein Name ist Kings Roger und ich bin der Leiter des Corporate Affairs Committee in der Standard Bank of South-Africa PLC in Südafrika. Zur Zeit halte ich mich in Deutschland zu einer Fortbildung auf. Ich kontaktiere Sie bezüglich des Transfers einer sehr großen Summe Geldes vom Konto eines Verstorbenen. Ich weiß, daß eine Transaktion dieser Größenordnung zunächst bei jedem Besorgnis erregen wird und versichere ich Ihnen, daß sich um alles gekümmert wird.Aufgrund der Dringlichkeit der Angelegenheit habe ich mich entschlossen, Sie zu kontaktieren. Es geht um folgendes: Einer meiner Kollegen ist für das Konto von Gerald Erbes zuständig, welcher im Oktober bei einem Flugzeugabsturz ums Leben kam. Er befand sich gemeinsam mit anderen Passagieren an Bord einer Egyptian Airline 990. Seit diesem Vorfall ist niemand seiner nächsten Verwandten mehr am Leben, der als sein Erbe Ansprüche auf das Guthaben auf seinem Konto erheben könnte. Wir können jedoch gemäß unserer Richtlinien das Geld nicht auszahlen, bevor jemand als Angehöriger und Erbe auftritt und seinen Anspruch geltend macht. Aufgrund dieser Entdeckung und der Übereinstimmung Ihres Namens mit dem des Verstorbenen bitten meine Kollegen und ich Sie nun um Ihre Erlaubnis, Sie als nächsten Angehörigen des Verstorbenen anzugeben. Die gesamte Abwicklung und Dokumentation wird sorgfältig von mir durchgeführt, damit das Guthaben von 26 Millionen US$ an Sie als nächsten Angehörigen ausgezahlt werden kann. Andernfalls wird die gesamte Summe nach fünf Jahren in das Eigentum der Bank übergehen und die Direktoren der Bank werden sie untereinander aufteilen. Aufgrund dieser Tatsache habe ich mich entschlossen, mich an Sie zu wenden, damit Sie als Erbe auftreten können und nicht alles den Direktoren zugute kommt. Da aber die Person, die im Testament als Erbin genannt wird, mit ihm gemeinsam verstorben ist, haben wir vom Nachlaßverwalter den Auftrag bekommen, ein Familienmitglied des Verstorbenen ausfindig zu machen, daß das Erbe antreten kann. Wir bitten Sie, unseren Vorschlag anzunehmen und versichern Ihnen, daß alles absolut risikofrei für Sie ablaufen wird. Wir werden Sie mit 6,5 Millionen US$ an der Transaktion beteiligen, den restlichen Betrag werden meine Kollegen und ich für. Falls Sie interessiert sind, schicken Sie mir bitte folgende Angaben: 1. Name/Firmen Name um die erforderlichen Dokumente vorzubereiten 2. Persönliche Telefon- und Fax-Nummern zu meiner privaten Email : [EMAIL PROTECTED], damit ich die weiteren relevanten Details in dieser Sache zu Ihnen mitteilen kann. Vielen Dank im Voraus. Wir bitten sie eindringlich, diese Angelegenheit vertraulich zu behandeln. Bitte antworten Sie mir schnellstmöglich und Gott segne sie. Mit freundlichen Gruessen, Mr. Kings Roger STANDARD BANK PLC - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[2.6 patch] ieee80211_wx.c: remove dead code
Since sec-key_sizes[] is an u8, len can't be 0. Spotted by the Coverity checker. Signed-off-by: Adrian Bunk [EMAIL PROTECTED] --- linux-2.6.16-rc6-mm1-full/net/ieee80211/ieee80211_wx.c.old 2006-03-14 03:01:43.0 +0100 +++ linux-2.6.16-rc6-mm1-full/net/ieee80211/ieee80211_wx.c 2006-03-14 03:02:02.0 +0100 @@ -505,7 +505,7 @@ int ieee80211_wx_get_encode(struct ieee8 len = sec-key_sizes[key]; memcpy(keybuf, sec-keys[key], len); - erq-length = (len = 0 ? len : 0); + erq-length = len; erq-flags |= IW_ENCODE_ENABLED; if (ieee-open_wep) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 3/13] d80211: non-shared interface types
On Mon, Mar 06, 2006 at 04:44:23PM +0100, Jiri Benc wrote: This patch removes iwmode variable (local-conf.mode) shared by all interfaces. Instead, every interface has its own type (STA/IBSS/AP/WDS). Index: dscape/include/net/d80211.h === --- dscape.orig/include/net/d80211.h 2006-03-06 13:33:02.0 +0100 +++ dscape/include/net/d80211.h 2006-03-06 14:10:07.0 +0100 @@ -242,8 +242,6 @@ struct ieee80211_conf { int freq; /* MHz */ int channel_val;/* hw specific value for the channel */ - int mode; /* IW_MODE_ */ - This breaks bcm43xx-d80211 build. Do you happen to have a patch to fix it? At least the experimental branch of your dscape.git seemed to show the same issue: CC [M] drivers/net/wireless/bcm43xx-d80211/bcm43xx_main.o drivers/net/wireless/bcm43xx-d80211/bcm43xx_main.c: In function ‘bcm43xx_net_config’: drivers/net/wireless/bcm43xx-d80211/bcm43xx_main.c:4412: error: ‘struct ieee80211_conf’ has no member named ‘mode’ drivers/net/wireless/bcm43xx-d80211/bcm43xx_main.c:4413: error: ‘struct ieee80211_conf’ has no member named ‘mode’ The same issue showed up with our low-level driver. How was the low-level driver supposed to get this information with this change? d80211 part is likely fine, since it has the interface type available like you mentioned, but I don't think that this is available to low-level drivers(?). In other words, keeping this int mode in struct ieee80211_conf may be useful even if we change d80211 to do something else internally. Which driver did you use to test these changes? I have now successfully tested both client and AP modes with bcm43xx_d80211 and Devicescape driver for Atheros cards prior to applying these 13 changes. I would like to verify that the new changes work properly before they are applied into the wireless-2.6 tree. I can change our code easily, but I would rather not touch bcm43xx_d80211 code to avoid any problems in keeping the reverse engineered implementation clean as far as use of proprietary information is concerned. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 3/13] d80211: non-shared interface types
On Wed, 15 Mar 2006 09:40:52 -0800, Jouni Malinen wrote: This breaks bcm43xx-d80211 build. Do you happen to have a patch to fix it? Yes, I do. Sorry for not posting it. This is a first part; it's just ugly and quick (but working) fix. Index: dscape/drivers/net/wireless/bcm43xx-d80211/bcm43xx.h === --- dscape.orig/drivers/net/wireless/bcm43xx-d80211/bcm43xx.h 2006-03-06 15:37:20.0 +0100 +++ dscape/drivers/net/wireless/bcm43xx-d80211/bcm43xx.h2006-03-15 17:29:08.0 +0100 @@ -720,6 +720,7 @@ struct bcm43xx_private { /* Informational stuff. */ char nick[IW_ESSID_MAX_SIZE + 1]; u8 bssid[ETH_ALEN]; + int interfaces; /* encryption/decryption */ u16 security_offset; Index: dscape/drivers/net/wireless/bcm43xx-d80211/bcm43xx_main.c === --- dscape.orig/drivers/net/wireless/bcm43xx-d80211/bcm43xx_main.c 2006-03-06 15:37:20.0 +0100 +++ dscape/drivers/net/wireless/bcm43xx-d80211/bcm43xx_main.c 2006-03-15 17:29:08.0 +0100 @@ -4409,9 +4409,6 @@ static int bcm43xx_net_config(struct net if (conf-channel != radio-channel) bcm43xx_radio_selectchannel(bcm, conf-channel, 0); - if (conf-mode != bcm-iw_mode) - bcm43xx_set_iwmode(bcm, conf-mode); - if (conf-short_slot_time != bcm-short_slot) { assert(phy-type == BCM43xx_PHYTYPE_G); if (conf-short_slot_time) @@ -4578,6 +4575,33 @@ static int bcm43xx_net_stop(struct net_d return 0; } +static int bcm43xx_add_interface(struct net_device *net_dev, +struct ieee80211_if_conf *conf) +{ + struct bcm43xx_private *bcm = bcm43xx_priv(net_dev); + + if (bcm-interfaces 0) + return -ENOBUFS; + if (memcmp(bcm-net_dev-dev_addr, conf-mac_addr, ETH_ALEN) != 0) + return -EADDRNOTAVAIL; + if (conf-type == IEEE80211_SUB_IF_TYPE_STA) + bcm-iw_mode = IW_MODE_INFRA; + else if (conf-type == IEEE80211_SUB_IF_TYPE_IBSS) + bcm-iw_mode = IW_MODE_ADHOC; + else + return -EOPNOTSUPP; + bcm-interfaces++; + return 0; +} + +static void bcm43xx_remove_interface(struct net_device *net_dev, +struct ieee80211_if_conf *conf) +{ + struct bcm43xx_private *bcm = bcm43xx_priv(net_dev); + + bcm-interfaces--; +} + /* Initialization of struct net_device, just after allocation. */ static void bcm43xx_netdev_setup(struct net_device *net_dev) { @@ -4659,6 +4683,8 @@ static int __devinit bcm43xx_init_one(st ieee-tx = bcm43xx_net_hard_start_xmit; ieee-open = bcm43xx_net_open; ieee-stop = bcm43xx_net_stop; + ieee-add_interface = bcm43xx_add_interface; + ieee-remove_interface = bcm43xx_remove_interface; ieee-reset = bcm43xx_net_reset; ieee-config = bcm43xx_net_config; //TODO ieee-set_key = bcm43xx_net_set_key; -- Jiri Benc SUSE Labs - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 1/13] d80211: allow WDS remote to by set by WE
On Mon, Mar 06, 2006 at 04:44:21PM +0100, Jiri Benc wrote: Setting of address of WDS remote peer wasn't possible by a WE call. Remote WDS peer can be understood as a remote AP and SIOCSIWAP/SIOCGIWAP are unused in WDS mode, so let's use them. This sounds good, but I was unable to get this working. I created a WDS link with initial peer address 00:01:02:03:04:05. This added the netdev and STA entry correctly. However, when I run iwconfig wds0 ap 00:11:22:33:44:55, I do not see any change in either the WDS data (/proc/net/ieee80211/wlan0/iface/wds0 did not show change in wds.peer) or STA info (00:11:22:33:44:55 was not added in /proc/net/ieee80211/wlan0/sta directory). Did I understand something incorrectly here? -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 3/13] d80211: non-shared interface types
On Wed, 15 Mar 2006 09:40:52 -0800, Jouni Malinen wrote: The same issue showed up with our low-level driver. How was the low-level driver supposed to get this information with this change? From struct ieee80211_if_conf in add_interface callback. d80211 part is likely fine, since it has the interface type available like you mentioned, but I don't think that this is available to low-level drivers(?). In other words, keeping this int mode in struct ieee80211_conf may be useful even if we change d80211 to do something else internally. If you have two interfaces running, one of STA type and of AP type, what should be the value of such global mode variable? It makes more sense to have per-interface mode and let driver decide what should be the mode it tells to the hardware. Which driver did you use to test these changes? I have now successfully tested both client and AP modes with bcm43xx_d80211 and Devicescape driver for Atheros cards prior to applying these 13 changes. I would like to verify that the new changes work properly before they are applied into the wireless-2.6 tree. I can change our code easily, but I would rather not touch bcm43xx_d80211 code to avoid any problems in keeping the reverse engineered implementation clean as far as use of proprietary information is concerned. Unfortunately, I'm not able to test AP mode as it's not supported by any available driver yet. For STA mode I use bcm43xx drivers. Thanks, -- Jiri Benc SUSE Labs - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 3/13] d80211: non-shared interface types
On Wed, Mar 15, 2006 at 06:47:40PM +0100, Jiri Benc wrote: On Wed, 15 Mar 2006 09:40:52 -0800, Jouni Malinen wrote: This breaks bcm43xx-d80211 build. Do you happen to have a patch to fix it? Yes, I do. Sorry for not posting it. This is a first part; it's just ugly and quick (but working) fix. Thanks! I'll try with this. In addition to fixing bcm43xx build, this was enough to remind me how the mode parameter is now available to low-level driver, so I'll test a similar change with our Atheros driver. One more driver is failing, though: drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_rxdone’: drivers/net/wireless/rt2x00/rt2400pci.c:756: warning: implicit declaration of function ‘ieee80211_rx’ drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_config_update’: drivers/net/wireless/rt2x00/rt2400pci.c:1468: error: ‘struct ieee80211_conf’ has no member named ‘mode’ drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_reset_tsf’: drivers/net/wireless/rt2x00/rt2400pci.c:1724: error: ‘struct ieee80211_conf’ has no member named ‘mode’ drivers/net/wireless/rt2x00/rt2400pci.c:1726: error: ‘struct ieee80211_conf’ has no member named ‘mode’ drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_init_hw’: drivers/net/wireless/rt2x00/rt2400pci.c:1972: error: ‘struct ieee80211_hw’ has no member named ‘set_mac_address’ I don't have rt2400 card in my current testbed (but should probably add one; I'm pretty sure I have such a card somewhere), so this is not critical for my current tests. Anyway, this will also need to be fixed before the d80211 changes can be merged into wireless-2.6. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 3/13] d80211: non-shared interface types
On Wed, Mar 15, 2006 at 06:59:53PM +0100, Jiri Benc wrote: On Wed, 15 Mar 2006 09:40:52 -0800, Jouni Malinen wrote: The same issue showed up with our low-level driver. How was the low-level driver supposed to get this information with this change? From struct ieee80211_if_conf in add_interface callback. This was the part I had already forgetten about.. If you have two interfaces running, one of STA type and of AP type, what should be the value of such global mode variable? It makes more sense to have per-interface mode and let driver decide what should be the mode it tells to the hardware. Agreed. The proposed change is much better way of doing this. Unfortunately, I'm not able to test AP mode as it's not supported by any available driver yet. For STA mode I use bcm43xx drivers. OK. I tested bcm43xx_dscape in AP mode yesterday and, to my surprise, it was actually almost working. It was not sending beacon frames, but once I convinced my client to not care about this, I was able to successfully associate in WPA-PSK/TKIP mode. Anyway, since I haven't yet got to the point of merging in needed changes for hostapd, that would be the next showstopper in allowing you to test this easily.. In other words, until I get that done, I better be prepared to use time on testing all the AP mode cases myself ;-). -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 1/13] d80211: allow WDS remote to by set by WE
On Wed, 15 Mar 2006 09:52:26 -0800, Jouni Malinen wrote: This sounds good, but I was unable to get this working. I created a WDS link with initial peer address 00:01:02:03:04:05. This added the netdev and STA entry correctly. However, when I run iwconfig wds0 ap 00:11:22:33:44:55, I do not see any change in either the WDS data (/proc/net/ieee80211/wlan0/iface/wds0 did not show change in wds.peer) or STA info (00:11:22:33:44:55 was not added in /proc/net/ieee80211/wlan0/sta directory). Did I understand something incorrectly here? Interesting. It shouldn't be in any way different than using PRISM2_HOSTAPD_UPDATE_IF ioctl. I can't find any problem looking at the patch and I'm unable to try it now - I have to find out why my notebook freezes first :-/ -- Jiri Benc SUSE Labs - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 3/13] d80211: non-shared interface types
On Wednesday 15 March 2006 19:02, Jouni Malinen wrote: On Wed, Mar 15, 2006 at 06:47:40PM +0100, Jiri Benc wrote: On Wed, 15 Mar 2006 09:40:52 -0800, Jouni Malinen wrote: This breaks bcm43xx-d80211 build. Do you happen to have a patch to fix it? Yes, I do. Sorry for not posting it. This is a first part; it's just ugly and quick (but working) fix. Thanks! I'll try with this. In addition to fixing bcm43xx build, this was enough to remind me how the mode parameter is now available to low-level driver, so I'll test a similar change with our Atheros driver. One more driver is failing, though: drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_rxdone’: drivers/net/wireless/rt2x00/rt2400pci.c:756: warning: implicit declaration of function ‘ieee80211_rx’ drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_config_update’: drivers/net/wireless/rt2x00/rt2400pci.c:1468: error: ‘struct ieee80211_conf’ has no member named ‘mode’ drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_reset_tsf’: drivers/net/wireless/rt2x00/rt2400pci.c:1724: error: ‘struct ieee80211_conf’ has no member named ‘mode’ drivers/net/wireless/rt2x00/rt2400pci.c:1726: error: ‘struct ieee80211_conf’ has no member named ‘mode’ drivers/net/wireless/rt2x00/rt2400pci.c: In function ‘rt2400pci_init_hw’: drivers/net/wireless/rt2x00/rt2400pci.c:1972: error: ‘struct ieee80211_hw’ has no member named ‘set_mac_address’ I don't have rt2400 card in my current testbed (but should probably add one; I'm pretty sure I have such a card somewhere), so this is not critical for my current tests. Anyway, this will also need to be fixed before the d80211 changes can be merged into wireless-2.6. For the ieee80211_rx error a patch was already send to the list, but there were some problems with the patch so it was resend a couple of weeks later, but has not yet been comitted to git yet. I can make a patch for the mode and set_mac_address changes this weekend, and either send it seperately, or make it part of the (already quite large) patch series that would update the in-kernel rt2x00 to our latest CVS version. IvD pgpVsOo1TFbgf.pgp Description: PGP signature
Re: [PATCH netdev-2.6 ] e100: Fix eeh on pseries during ethtool -t
On 3/11/06, Jeff Garzik [EMAIL PROTECTED] wrote: Jeff Kirsher wrote: Jeff - should a patch be made for 2.6.16 also? Yes, since this is a small fix and there aren't a ton of e100 changes, I would prefer that you create a 'git pull' against 2.6.16-rc (latest Linus git tree), and I will pull that into upstream-fixes, and then on my side, pull upstream-fixes into upstream. Jeff, here is the diff against current linus-2.6.git The following changes since commit f13b83580acef03a36c785dccc534ccdd7e43084: are found in the git repository at: git://198.78.49.142/~jbrandeb/linux-2.6 e100-fixes Jesse Brandeburg: e100: fix eeh on pseries during ethtool -t drivers/net/e100.c |5 - 1 files changed, 4 insertions(+), 1 deletions(-) I hope I'm getting the hang of this now... Its the same patch but if you want me to reply with the patch again let me know. Thanks, Jesse - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2.6.16-rc6] e1000: update the readme with the latest text
From: Jesse Brandeburg [EMAIL PROTECTED] The text of the e1000.txt file is a little stale, lets freshen it up. (update) removed some non-kernel specific text Signed-off-by: Jesse Brandeburg [EMAIL PROTECTED] --- Documentation/networking/e1000.txt | 634 +++- 1 files changed, 402 insertions(+), 232 deletions(-) diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt index 2ebd405..71fe15a 100644 --- a/Documentation/networking/e1000.txt +++ b/Documentation/networking/e1000.txt @@ -1,7 +1,7 @@ Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters === -November 17, 2004 +November 15, 2005 Contents @@ -20,254 +20,316 @@ In This Release === This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family -of Adapters, version 5.x.x. +of Adapters. This driver includes support for Itanium(R)2-based systems. -For questions related to hardware requirements, refer to the documentation -supplied with your Intel PRO/1000 adapter. All hardware requirements listed +For questions related to hardware requirements, refer to the documentation +supplied with your Intel PRO/1000 adapter. All hardware requirements listed apply to use with Linux. -Native VLANs are now available with supported kernels. +The following features are now available in supported kernels: + - Native VLANs + - Channel Bonding (teaming) + - SNMP + +Channel Bonding documentation can be found in the Linux kernel source: +/Documentation/networking/bonding.txt + +The driver information previously displayed in the /proc filesystem is not +supported in this release. Alternatively, you can use ethtool (version 1.6 +or later), lspci, and ifconfig to obtain the same information. + +Instructions on updating ethtool can be found in the section Additional +Configurations later in this document. + Identifying Your Adapter -For more information on how to identify your adapter, go to the Adapter +For more information on how to identify your adapter, go to the Adapter Driver ID Guide at: http://support.intel.com/support/network/adapter/pro100/21397.htm -For the latest Intel network drivers for Linux, refer to the following -website. In the search field, enter your adapter name or type, or use the +For the latest Intel network drivers for Linux, refer to the following +website. In the search field, enter your adapter name or type, or use the networking link on the left to search for your adapter: http://downloadfinder.intel.com/scripts-df/support_intel.asp -Command Line Parameters -=== -If the driver is built as a module, the following optional parameters are -used by entering them on the command line with the modprobe or insmod command -using this syntax: +Command Line Parameters === + +If the driver is built as a module, the following optional parameters +are used by entering them on the command line with the modprobe or insmod +command using this syntax: modprobe e1000 [option=VAL1,VAL2,...] - insmod e1000 [option=VAL1,VAL2,...] + insmod e1000 [option=VAL1,VAL2,...] For example, with two PRO/1000 PCI adapters, entering: insmod e1000 TxDescriptors=80,128 -loads the e1000 driver with 80 TX descriptors for the first adapter and 128 TX -descriptors for the second adapter. +loads the e1000 driver with 80 TX descriptors for the first adapter and 128 +TX descriptors for the second adapter. The default value for each parameter is generally the recommended setting, -unless otherwise noted. Also, if the driver is statically built into the -kernel, the driver is loaded with the default values for all the parameters. -Ethtool can be used to change some of the parameters at runtime. - -NOTES: For more information about the AutoNeg, Duplex, and Speed - parameters, see the Speed and Duplex Configuration section in - this document. - - For more information about the InterruptThrottleRate, RxIntDelay, - TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay parameters, see the - application note at: - http://www.intel.com/design/network/applnots/ap450.htm +unless otherwise noted. + +NOTES: For more information about the AutoNeg, Duplex, and Speed +parameters, see the Speed and Duplex Configuration section in +this document. - A descriptor describes a data buffer and attributes related to the - data buffer. This information is accessed by the hardware. +For more information about the InterruptThrottleRate, +RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay +parameters, see the application note at: +http://www.intel.com/design/network/applnots/ap450.htm -AutoNeg (adapters using copper connections only) -Valid Range: 0x01-0x0F,
Re: [PATCH 2.6.16-rc6 0/3] MAINTAINERS, e100 and e1000 text file updates
On 3/14/06, Jesse Brandeburg [EMAIL PROTECTED] wrote: okay, here goes... these patches are against Linus's current tree. They only update text files, no code updates. The large change to e1000.txt includes whitespace changes, and some content. They could be included with 2.6.16 as they are for the drivers that are already merged. I realized that there was still some un-necessary remnants of our tarball based readme so the git pull request now looks like this and has the changes in the following patch (under seperate cover) The following changes since commit a488edc914aa1d766a4e2c982b5ae03d5657ec1b: are found in the git repository at: git://198.78.49.142/~jbrandeb/linux-2.6 e1000-fixes Jesse Brandeburg: e100/e1000/ixgb: update MAINTAINERS to current developers e100: update e100.txt e1000: update the readme with the latest text Documentation/networking/e100.txt | 158 ++--- Documentation/networking/e1000.txt | 634 +++- MAINTAINERS| 16 + 3 files changed, 509 insertions(+), 299 deletions(-) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ieee80211: Fix CCMP decryption problem when QoS is enabled
On Wed, Mar 08, 2006 at 10:49:19AM +0800, Zhu Yi wrote: From: Zhu Yi [EMAIL PROTECTED] Date: Tue, 28 Feb 2006 07:06:43 + (+0800) [PATCH] ieee80211: Fix CCMP decryption problem when QoS is enabled Use the correct STYPE for Qos data. Merged to the 'upstream-fixes' branch of wireless-2.6. Thanks! John -- John W. Linville [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Please pull 'upstream-fixes' branch of wireless-2.6
The following changes since commit f13b83580acef03a36c785dccc534ccdd7e43084: Adrian Bunk: fs/namespace.c:dup_namespace(): fix a use after free are found in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git upstream-fixes Hong Liu: ieee80211: Fix QoS is not active problem Zhu Yi: ieee80211: Fix CCMP decryption problem when QoS is enabled net/ieee80211/ieee80211_crypt_ccmp.c |2 +- net/ieee80211/ieee80211_rx.c |4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 4702217..3840d19 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -131,7 +131,7 @@ static void ccmp_init_blocks(struct cryp a4_included = ((fc (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)); qc_included = ((WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA) - (WLAN_FC_GET_STYPE(fc) 0x08)); + (WLAN_FC_GET_STYPE(fc) IEEE80211_STYPE_QOS_DATA)); aad_len = 22; if (a4_included) aad_len += 6; diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index b410ab8..7ac6a71 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -1417,10 +1417,10 @@ static void ieee80211_process_probe_resp if (is_beacon(beacon-header.frame_ctl)) { if (ieee-handle_beacon != NULL) - ieee-handle_beacon(dev, beacon, network); + ieee-handle_beacon(dev, beacon, target); } else { if (ieee-handle_probe_response != NULL) - ieee-handle_probe_response(dev, beacon, network); + ieee-handle_probe_response(dev, beacon, target); } } -- John W. Linville [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ieee80211: Fix QoS is not active problem
On Wed, Mar 08, 2006 at 10:50:20AM +0800, Zhu Yi wrote: From: Hong Liu [EMAIL PROTECTED] Date: Wed, 8 Mar 2006 02:28:01 + (+0800) [PATCH] ieee80211: Fix QoS is not active problem Fix QoS is not active even the network and the card is QOS enabled. The problem is we pass the wrong ieee80211_network address to ipw_handle_beacon/ipw_handle_probe_response, thus the ieee80211_network-qos_data.active will not be set, causing the driver not sending QoS frames at all. Merged to the 'upstream-fixes' branch of wireless-2.6. Thanks! John -- John W. Linville [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH]: e1000 endianness bugs
return -E_NO_BIG_ENDIAN_TESTING; [E1000]: Fix 4 missed endianness conversions on RX descriptor fields. Signed-off-by: David S. Miller [EMAIL PROTECTED] diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 5b7d0f4..1d91117 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3710,7 +3710,7 @@ e1000_clean_rx_irq(struct e1000_adapter e1000_rx_checksum(adapter, (uint32_t)(status) | ((uint32_t)(rx_desc-errors) 24), - rx_desc-csum, skb); + le16_to_cpu(rx_desc-csum), skb); skb-protocol = eth_type_trans(skb, netdev); #ifdef CONFIG_E1000_NAPI @@ -3854,11 +3854,11 @@ e1000_clean_rx_irq_ps(struct e1000_adapt } e1000_rx_checksum(adapter, staterr, - rx_desc-wb.lower.hi_dword.csum_ip.csum, skb); + le16_to_cpu(rx_desc-wb.lower.hi_dword.csum_ip.csum), skb); skb-protocol = eth_type_trans(skb, netdev); if (likely(rx_desc-wb.upper.header_status - E1000_RXDPS_HDRSTAT_HDRSP)) + cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP))) adapter-rx_hdr_split++; #ifdef CONFIG_E1000_NAPI if (unlikely(adapter-vlgrp (staterr E1000_RXD_STAT_VP))) { @@ -3884,7 +3884,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapt #endif next_desc: - rx_desc-wb.middle.status_error = ~0xFF; + rx_desc-wb.middle.status_error = cpu_to_le32(~0xFF); buffer_info-skb = NULL; /* return some buffers to hardware, one at a time is too slow */ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH]: e1000 endianness bugs
On 3/15/06, David S. Miller [EMAIL PROTECTED] wrote: return -E_NO_BIG_ENDIAN_TESTING; [E1000]: Fix 4 missed endianness conversions on RX descriptor fields. Signed-off-by: David S. Miller [EMAIL PROTECTED] Yep, those look like bugs to me, thanks and congratulations, you're the first person to test our PCI Express adapters in a big endian system (they haven't been available before, and we don't have one, yet) Acked-by: Jesse Brandeburg [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
TSO still shuts-off on retrans?
Just wanted to ask if TSO still shuts-off on a connection at the first retransmission? thanks, rick jones - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: TSO still shuts-off on retrans?
From: Rick Jones [EMAIL PROTECTED] Date: Wed, 15 Mar 2006 15:32:11 -0800 Just wanted to ask if TSO still shuts-off on a connection at the first retransmission? No, it will not. All of the code paths that clear the NETIF_F_TSO bit in response to loss have been removed. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: TSO still shuts-off on retrans?
David S. Miller wrote: From: Rick Jones [EMAIL PROTECTED] Date: Wed, 15 Mar 2006 15:32:11 -0800 Just wanted to ask if TSO still shuts-off on a connection at the first retransmission? No, it will not. Excellent! All of the code paths that clear the NETIF_F_TSO bit in response to loss have been removed. When did that go-in? thanks, rick jones - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: TSO still shuts-off on retrans?
From: Rick Jones [EMAIL PROTECTED] Date: Wed, 15 Mar 2006 15:41:04 -0800 David S. Miller wrote: All of the code paths that clear the NETIF_F_TSO bit in response to loss have been removed. When did that go-in? 2.6.14 I think. But best to test with current sources because there have been a few congestion control bugs fixed in those TSO changes lately. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH]: e1000 endianness bugs
From: Jesse Brandeburg [EMAIL PROTECTED] Date: Wed, 15 Mar 2006 15:33:43 -0800 Yep, those look like bugs to me, thanks and congratulations, you're the first person to test our PCI Express adapters in a big endian system (they haven't been available before, and we don't have one, yet) It was onboard a Niagara T2000 system. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 1/13] d80211: allow WDS remote to by set by WE
On Wed, Mar 15, 2006 at 07:24:05PM +0100, Jiri Benc wrote: On Wed, 15 Mar 2006 09:52:26 -0800, Jouni Malinen wrote: This sounds good, but I was unable to get this working. I created a WDS link with initial peer address 00:01:02:03:04:05. This added the netdev and STA entry correctly. However, when I run iwconfig wds0 ap 00:11:22:33:44:55, I do not see any change in either the WDS data (/proc/net/ieee80211/wlan0/iface/wds0 did not show change in wds.peer) or STA info (00:11:22:33:44:55 was not added in /proc/net/ieee80211/wlan0/sta directory). Interesting. It shouldn't be in any way different than using PRISM2_HOSTAPD_UPDATE_IF ioctl. I can't find any problem looking at the patch and I'm unable to try it now - I have to find out why my notebook freezes first :-/ Neither did I at first, but after some debugging, I would assume that you actually meant to return from ieee80211_ioctl_siwap() if the previous address is identical to the new one, not if it has changed.. In other words: Index: wireless-2.6/net/d80211/ieee80211_ioctl.c === --- wireless-2.6.orig/net/d80211/ieee80211_ioctl.c +++ wireless-2.6/net/d80211/ieee80211_ioctl.c @@ -1871,7 +1871,7 @@ static int ieee80211_ioctl_siwap(struct return ieee80211_sta_set_bssid(dev, (u8 *) ap_addr-sa_data); } else if (sdata-type == IEEE80211_SUB_IF_TYPE_WDS) { if (memcmp(sdata-u.wds.remote_addr, (u8 *) ap_addr-sa_data, - ETH_ALEN) != 0) + ETH_ALEN) == 0) return 0; return ieee80211_if_update_wds(dev, (u8 *) ap_addr-sa_data); } This was enough to fix wds.peer address and get rid of the old STA entry. However, new STA entry was not added for the new peer address. PS. I have now tested AP mode up to and including patch 5/13. This was still allowing multi-BSSID to be used successfully. Patch 6/13 breaks multi-BSSID configuration since our low-level driver needs bssid_mask and bss_count to be configured. I need to take a closer look at how this can be avoided with the new add_interface notification and automatic BSSID mask calculation at the low-level driver. Patch 7/13 breaks everything since we currently start with radio disabled by default.. I can change that to be the other way around to avoid this, but there are still some cases that need to be resolved (e.g., need to stop TX/RX when a radar is detected for the duration of scan for a new channel). I don't want to bring down all network interfaces for that, so some kind of mechanism for temporarily disabling TX/RX would be useful to have. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 6/13] d80211: remove obsolete stuff
On Mon, Mar 06, 2006 at 04:44:26PM +0100, Jiri Benc wrote: Because any number of interfaces may be added, bss_devs and sta_devs arrays cannot be fixed-size arrays. We can make them linked lists, but they are needed for optimalization only (and even that is questionable with subsequent patches). Let's remove them; we will probably want something similar later to speed up packet receiving, but let's not bother ourselves now. @@ -277,9 +277,6 @@ struct ieee80211_conf { int antenna_def; int antenna_mode; - u8 bssid_mask[ETH_ALEN];/* ff:ff:ff:ff:ff:ff = 1 BSSID */ - int bss_count; In theory, the low-level driver can determine the needed mask itself. However, it would need to be somehow notified of allowed BSSID values. By removing this entry, this information would need to fetched from somewhere else before interfaces are added. Most hardware implementations have storage for a single MAC address in EEPROM (or something similar) and in some cases, no addresses are stored with the card and some external store is needed for this. We have been using this mechanism of passing the information from user space to avoid problems in figuring out board specific mechanisms for storing extra data. Do you have any ideas on what would be the best of getting this information configured after this change? --- dscape.orig/net/d80211/ieee80211.c2006-03-06 14:10:18.0 +0100 +++ dscape/net/d80211/ieee80211.c 2006-03-06 14:10:22.0 +0100 @@ -1569,17 +1569,14 @@ struct sk_buff * ieee80211_beacon_get(st u8 *b_head, *b_tail; int bh_len, bt_len; - spin_lock_bh(local-sub_if_lock); - if (bss_idx 0 || bss_idx = local-bss_dev_count) - bdev = NULL; - else { - bdev = local-bss_devs[bss_idx]; + bdev = dev_get_by_index(bss_idx); This and similar change for ieee80211_get_buffered_bc() add more requirements for the low-level driver. It used to be enough to just know that the low-level code should ask for up to N beacon frames. However, with this change, the low-level driver would need to maintain a list of ifindexes for the virtual interfaces. This is somewhat against the original design of hiding all the virtual interfaces from low-level code. I think the ifindex values could be made available from add/remove interface calls that you added. Was that what you had in mind or is there another mechanism for getting the needed ifindexes down? I need to understand this bit better in order to be able to modify the low-level driver to handle this kind of change. At the moment, this change does not look very good to me because of the extra requirement added for the low-level code as far as virtual interfaces are concerned. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [RFC/PATCH 6/13] d80211: remove obsolete stuff
The more natural way for beacons to flow from the 80211.o to the low level driver would be for beacons to be passed down just like any other 802.11 frame is passed down - rather than having a special case for beacons and buffered MC data, where they are pulled. I would suggest making the qdisc aware of beacons, and then there is no special interface for passing beacons down - they are passed down just like other frames, with a special queue ID reserved for beacons and buffered multicast. This would simplify the 80211.o/low level interface. Simon -Original Message- From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] On Behalf Of Jouni Malinen Sent: Wednesday, March 15, 2006 4:36 PM To: Jiri Benc Cc: netdev@vger.kernel.org Subject: Re: [RFC/PATCH 6/13] d80211: remove obsolete stuff On Mon, Mar 06, 2006 at 04:44:26PM +0100, Jiri Benc wrote: Because any number of interfaces may be added, bss_devs and sta_devs arrays cannot be fixed-size arrays. We can make them linked lists, but they are needed for optimalization only (and even that is questionable with subsequent patches). Let's remove them; we will probably want something similar later to speed up packet receiving, but let's not bother ourselves now. @@ -277,9 +277,6 @@ struct ieee80211_conf { int antenna_def; int antenna_mode; - u8 bssid_mask[ETH_ALEN];/* ff:ff:ff:ff:ff:ff = 1 BSSID */ - int bss_count; In theory, the low-level driver can determine the needed mask itself. However, it would need to be somehow notified of allowed BSSID values. By removing this entry, this information would need to fetched from somewhere else before interfaces are added. Most hardware implementations have storage for a single MAC address in EEPROM (or something similar) and in some cases, no addresses are stored with the card and some external store is needed for this. We have been using this mechanism of passing the information from user space to avoid problems in figuring out board specific mechanisms for storing extra data. Do you have any ideas on what would be the best of getting this information configured after this change? --- dscape.orig/net/d80211/ieee80211.c2006-03-06 14:10:18.0 +0100 +++ dscape/net/d80211/ieee80211.c 2006-03-06 14:10:22.0 +0100 @@ -1569,17 +1569,14 @@ struct sk_buff * ieee80211_beacon_get(st u8 *b_head, *b_tail; int bh_len, bt_len; - spin_lock_bh(local-sub_if_lock); - if (bss_idx 0 || bss_idx = local-bss_dev_count) - bdev = NULL; - else { - bdev = local-bss_devs[bss_idx]; + bdev = dev_get_by_index(bss_idx); This and similar change for ieee80211_get_buffered_bc() add more requirements for the low-level driver. It used to be enough to just know that the low-level code should ask for up to N beacon frames. However, with this change, the low-level driver would need to maintain a list of ifindexes for the virtual interfaces. This is somewhat against the original design of hiding all the virtual interfaces from low-level code. I think the ifindex values could be made available from add/remove interface calls that you added. Was that what you had in mind or is there another mechanism for getting the needed ifindexes down? I need to understand this bit better in order to be able to modify the low-level driver to handle this kind of change. At the moment, this change does not look very good to me because of the extra requirement added for the low-level code as far as virtual interfaces are concerned. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC/PATCH 6/13] d80211: remove obsolete stuff
On Wed, Mar 15, 2006 at 04:41:56PM -0800, Simon Barber wrote: The more natural way for beacons to flow from the 80211.o to the low level driver would be for beacons to be passed down just like any other 802.11 frame is passed down - rather than having a special case for beacons and buffered MC data, where they are pulled. I would suggest making the qdisc aware of beacons, and then there is no special interface for passing beacons down - they are passed down just like other frames, with a special queue ID reserved for beacons and buffered multicast. This would simplify the 80211.o/low level interface. Sure, but it would also require good synchronization for sending the beacons just before they are needed for transmission.. If the wlan hardware implementation provides support for interrupts that request beacons at proper times, being able to use them for this is quite convenient. -- Jouni MalinenPGP id EFC895FA - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Wed, 2006-03-15 at 10:21 -0500, jamal wrote: It could - if it can proven to maintain backward compatibility. I think backward compatibility would probably be fine to be defined as it works with one byte. But you are right. It is a pain. One suggestion Stephen Hemminger had was to transport the hash on a TLV; I am not a big fan of dynamic coding. If you reject utsname, then this is the only way to maintain backward compatibility. Well, the way we strive to have code working in backwards compatible format with any netlink derived code such as is the case with iproute2 is to introduce new TLVs. This way old kernels ignore new ones and new ones can opt to ignore old TLVs. You will never ever see code which says if this is 2.4.x then x - it is considered close to blasphemy; more importantly it adds maintainance overhead - imagine if someone changed the hash in the kernel. Hehe. Thanks for the explanation - it is appreciated. I am a newbie at this kernel coding thing. Assuming you are not swayed by the arguments above, I think I have lost this one. I presume you have no objections to changing tc to use the 2.6 hash and leave it at that. Yes? indeed. OK, so the next email from me will be the patch. I would like to get the over with, so I can move onto the next set of patches I have for tc. I dont agree with reverting the 2.6.x change. In the future if you do prove that the old one is better or a newer one is better then we will revisit given the definition we have of backwards compatibility above. Note, you need a lot more data than the simple example you showed. We can discuss this later etc. I am interested. Yes - later. I have posted data where the 2.4 algorithm fares better. To proceed with the discussion we need your data that shows it is worse. Then we can haggle about which is more likely in real life. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Thu, 16 Mar 2006 10:52:03 +1000 Russell Stuart [EMAIL PROTECTED] wrote: On Wed, 2006-03-15 at 10:21 -0500, jamal wrote: It could - if it can proven to maintain backward compatibility. I think backward compatibility would probably be fine to be defined as it works with one byte. But you are right. It is a pain. One suggestion Stephen Hemminger had was to transport the hash on a TLV; I am not a big fan of dynamic coding. If you reject utsname, then this is the only way to maintain backward compatibility. Also using utsname won't work if you want to do remote netlink (which jamal talks about but don't know if it ever gets used). - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Thu, 2006-16-03 at 10:52 +1000, Russell Stuart wrote: On Wed, 2006-03-15 at 10:21 -0500, jamal wrote: I dont agree with reverting the 2.6.x change. In the future if you do prove that the old one is better or a newer one is better then we will revisit given the definition we have of backwards compatibility above. Note, you need a lot more data than the simple example you showed. We can discuss this later etc. I am interested. Yes - later. I have posted data where the 2.4 algorithm fares better. To proceed with the discussion we need your data that shows it is worse. Then we can haggle about which is more likely in real life. Sounds good. I am assuming you are on your way to sending the patch, so let me note again the variables i mentioned when you do run those comparison. Your test was too simplistic to merit a fair comparison. The variables again are: a) the mask/mask size b) the size of the buckets available example if you are masking on 2 bits, then it doesnt matter if you have 256 buckets - only 4 get used. So creating more than 4 is a waste of memory. c) The offset within the 32 bit. I dont think this is a big factor if you keep shifting by increments of a byte. The last one is the range of values in your test data. Example if you have only 8 bits that you are masking on, then you dont need an input dataset of more than 256 - it adds no value. IIRC, the old 2.4.x hash is not only inefficient over a wide set of values it will be considered plain _buggy_ for lower values. I think over the weekend i will try to write a little program myself to simulate the different cases myself. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
snipMuch discussion bashing this issue to death./snip (sorry, jamal - this one is CC'ed to lartc.) Here is are revised versions of the 2 as yet unapplied patches. PATCH 1 === [Has been applied.] PATCH 2 === In tc, the u32 sample clause uses the 2.4 hashing algorithm. The hashing algorithm used by the kernel changed in 2.6, consequently sample hasn't work since then. This patch makes the sample clause work 2.6 only. This is different from the prior version of the patch, in that it made it work with 2.4 and 2.6: diff -Nur iproute-20051007.keep/tc/f_u32.c iproute-20051007/tc/f_u32.c --- iproute-20051007.keep/tc/f_u32.c2006-03-14 12:28:17.0 +1000 +++ iproute-20051007/tc/f_u32.c 2006-03-16 11:08:25.0 +1000 @@ -888,8 +888,18 @@ return -1; } hash = sel2.sel.keys[0].valsel2.sel.keys[0].mask; +#if0 + /* 2.2 .. 2.4 hashing algorithm */ hash ^= hash16; hash ^= hash8; +#else + /* 2.5 onwards */ + __u32 mask = sel2.sel.keys[0].mask; + while (mask !(mask 1)) { + mask = 1; + hash = 1; + } +#endif htid = ((hash12)0xFF000)|(htid0xFFF0); sample_ok = 1; continue; PATCH 3 === tc does not allow you to specify the divisor for the sample clause, it always assumes a divisor of 256. If the divisor isn't 256, (ie it is something less), the kernel will usually whinge because the bucket given to it by tc is typically too big. This patch adds a divisor option to tc's sample clause. This is identical to the previous version of the patch, other than it now applies correctly after the revised PATCH 3. diff -Nur iproute-20051007.keep/tc/f_u32.c iproute-20051007/tc/f_u32.c --- iproute-20051007.keep/tc/f_u32.c2006-03-16 11:27:17.0 +1000 +++ iproute-20051007/tc/f_u32.c 2006-03-16 11:29:56.0 +1000 @@ -34,7 +34,7 @@ fprintf(stderr, or u32 divisor DIVISOR\n); fprintf(stderr, \n); fprintf(stderr, Where: SELECTOR := SAMPLE SAMPLE ...\n); - fprintf(stderr,SAMPLE := { ip | ip6 | udp | tcp | icmp | u{32|16|8} | mark } SAMPLE_ARGS\n); + fprintf(stderr,SAMPLE := { ip | ip6 | udp | tcp | icmp | u{32|16|8} | mark } SAMPLE_ARGS [divisor DIVISOR]\n); fprintf(stderr,FILTERID := X:Y:Z\n); } @@ -834,7 +834,7 @@ unsigned divisor; NEXT_ARG(); if (get_unsigned(divisor, *argv, 0) || divisor == 0 || - divisor 0x100) { + divisor 0x100 || (divisor - 1 divisor)) { fprintf(stderr, Illegal \divisor\\n); return -1; } @@ -874,6 +874,7 @@ htid = (handle0xF000); } else if (strcmp(*argv, sample) == 0) { __u32 hash; + unsigned divisor = 0x100; struct { struct tc_u32_sel sel; struct tc_u32_key keys[4]; @@ -888,6 +889,15 @@ fprintf(stderr, \sample\ must contain exactly ONE key.\n); return -1; } + if (*argv != 0 strcmp(*argv, divisor) == 0) { + NEXT_ARG(); + if (get_unsigned(divisor, *argv, 0) || divisor == 0 || + divisor 0x100 || (divisor - 1 divisor)) { + fprintf(stderr, Illegal sample \divisor\\n); + return -1; + } + NEXT_ARG(); + } hash = sel2.sel.keys[0].valsel2.sel.keys[0].mask; #if0 /* 2.2 .. 2.4 hashing algorithm */ @@ -901,7 +911,7 @@ hash = 1; } #endif - htid = ((hash12)0xFF000)|(htid0xFFF0); + htid = ((hash%divisor)12)|(htid0xFFF0); sample_ok = 1; continue; } else if (strcmp(*argv, indev) == 0) { - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
taken out lartc. On Thu, 2006-16-03 at 11:43 +1000, Russell Stuart wrote: [..] PATCH 2 === In tc, the u32 sample clause uses the 2.4 hashing algorithm. The hashing algorithm used by the kernel changed in 2.6, consequently sample hasn't work since then. This patch makes the sample clause work 2.6 only. This is different from the prior version of the patch, in that it made it work with 2.4 and 2.6: I dont think Stephen would like that #if 0; however, this is not why i am speaking up;- Your statement above that sample has never worked since maybe misleading (just in case the text goes to the release logs). If you used sample the way it is being used by mere mortals (sample on the protocol), the hashing by either one of the two would end up in the same bucket. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Wed, 2006-03-15 at 20:28 -0500, jamal wrote: The variables again are: a) the mask/mask size b) the size of the buckets available example if you are masking on 2 bits, then it doesnt matter if you have 256 buckets - only 4 get used. So creating more than 4 is a waste of memory. c) The offset within the 32 bit. I dont think this is a big factor if you keep shifting by increments of a byte. The last one is the range of values in your test data. Example if you have only 8 bits that you are masking on, then you dont need an input dataset of more than 256 - it adds no value. IIRC, the old 2.4.x hash is not only inefficient over a wide set of values it will be considered plain _buggy_ for lower values. I think over the weekend i will try to write a little program myself to simulate the different cases myself. Hmmm, you triggered an idea. You seem to be interested in one byte values. I think you will have trouble proving that 2.4 isn't better for multi byte values. Which leaves the issue of one byte values. Up till now I could not see how 2.4 could be worse than 2.6 in that case because the 2.4 and 2.6 are identical for 1 byte aligned values. OK, so assume they are not aligned. 2.6 effectively does a N bit shift, and 2.4 does not. So assume you have an 8 bit value like this sitting in a 16 bit word: +--+ | . . . . B0 B1 B2 B3 | B4 B5 B6 B7 . . . . | +--+ The rest of the bits are masked off. 2.6 will shift to get at the bits by shifting, so will the hashed value will be: +-+ | B0 B1 B2 B3 B4 B5 B6 B7 | +-+ In we compute the hash by XOR'ing the two bytes, so the result will be: +-+ | B4 B5 B6 B7 B0 B1 B2 B3 | +-+ In this case 2.4 and 2.6 compute different values, but the effectiveness of the hash will be the same. At this point I gave up: I didn't see how 2.6 could every be better than 2.4. But then I am more interested in multi byte values. But I see now you are interested in patterns of less than 8 bits. I bet it is the TOS bits in the IP header - you are using hashkey u8 0x1C at 1. At first sight this is a simple 1 byte hash - but everything changes if you use a divisor of 8 - which I presume you are. In that case the finish hash for 2.6 will be spread evenly across the 8 buckets. The 2.4 hash will use only 2 buckets with 4 in each. This arises because 2.6 shifts and 2.4 XOR's. The 2.4 problem disappears if you use a divisor of 32 for this case, meaning that if you use a divisor of 32 then 2.4 runs as fast as 2.6. The overhead for doing is is take an extra 96 bytes (= (32-8)*4, each hash table has an overhead of a pointer - 4 bytes) in the kernel. So the trade offs appear to be: For 1 byte values: 2.4 may require more memory than 2.6 to run at the same speed. For 1 byte values: The hashes are identical. For 1 byte values: 2.6 will usually run slower than 2.4. Given todays technology, I think memory for speed was the right trade off. But, there is room for compromise. We could have the best of both worlds, by combining the 2.4 and 2.6 hashes. Ie, something like this: hash = (value MASK) shift; // Identical to 2.6 hash = (hash 16) | hash; // Identical to 2.4 hash = (hash 8) | hash; // Identical to 2.4 return hash 0xFF; // Identical to both. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Wed, 2006-03-15 at 20:56 -0500, jamal wrote: I dont think Stephen would like that #if 0; however, this is not why i am speaking up;- I put it in there so it would be easy for someone using 2.4 to revert the patch, if they felt so inclined. Stephen let me know if you want it removed. Your statement above that sample has never worked since maybe misleading (just in case the text goes to the release logs). If you used sample the way it is being used by mere mortals (sample on the protocol), the hashing by either one of the two would end up in the same bucket. True - depending on how you define mortal. It wouldn't work if you were hashing on the TOS bits, for example. There may be more than one person in the world doing that - although there isn't a huge amount of evidence for it. Anyway, I image Stephen doesn't like the comment, he will choose a different one for the CVS log. It is not like it is part of the source. However, before anyone commits it, I would like to hear your comments on my compromise hashing algorithm. I would be a shame to have to patch it twice. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Thu, 2006-16-03 at 12:37 +1000, Russell Stuart wrote: On Wed, 2006-03-15 at 20:28 -0500, jamal wrote: The variables again are: a) the mask/mask size b) the size of the buckets available example if you are masking on 2 bits, then it doesnt matter if you have 256 buckets - only 4 get used. So creating more than 4 is a waste of memory. c) The offset within the 32 bit. I dont think this is a big factor if you keep shifting by increments of a byte. The last one is the range of values in your test data. Example if you have only 8 bits that you are masking on, then you dont need an input dataset of more than 256 - it adds no value. IIRC, the old 2.4.x hash is not only inefficient over a wide set of values it will be considered plain _buggy_ for lower values. I think over the weekend i will try to write a little program myself to simulate the different cases myself. Hmmm, you triggered an idea. You seem to be interested in one byte values. I think you will have trouble proving that 2.4 isn't better for multi byte values. Try this: Suppose you pick octet 3 of the dst IP address and we assume the full range of that octet i.e a range of values 0-255 (the details are a lot more complicated of where this byte belongs, example this could be part of an IPV6 address and we take all the bits and spread them horizontally in many hash tables): Assume for a specific hash table you are interested in only 6 bits so use a mask of 0xfc. Now clearly it doesnt make sense to have anything more than 64 buckets. So restrict your buckets to 64. Run the two algorithms. Derive using your equation how things look like. I have a feeling that you may need to plot to see this. Next try to make the hasn buckets 64 say 128, 256. Next try to vary the offset of this byte and therefore the mask within a 32 bit;- And when you are done and things look odd then run the full test results by varying the variables. I am certain that the values you picked for hash buckets etc make the old hash look favorable; but that will be one of the _very_ few cases where it would look good. This is not hard - we need some scientific data;- [Deleting the rest of your email because it is related to the above]. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Thu, 2006-16-03 at 12:45 +1000, Russell Stuart wrote: However, before anyone commits it, I would like to hear your comments on my compromise hashing algorithm. I would be a shame to have to patch it twice. Refer to my other email - I think a lot more work is needed before reaching conclusions. Patching in the future should not be an issue if you do the right thing. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
2.6.15 localhost performance hit
Hello, I've taken a performance hit over localhost between kernels 2.6.14 and 2.6.15 in my client/server application. I'm trying to gut things down to a simple test case, in the meantime, this is what I've been discussing with the people at the fedora test list : This is only over localhost (lo). Two machines running client/server 2.6.15 over ether seem fine, as was 2.6.14. 2.6.14 : about one or two recv() calls out of 48,000 take nearly 40 ms. (no big deal--might add 80 ms. to a 20 second operation). 2.6.15 : about 3,000 recv() calls out of 48,000 take nearly 40 ms. (adds almost two minutes) From strace : 15:27:04.568800 recv(3, ?xml version=\1.0\ encoding=\UT..., 555, ) = 555 0.000121 vs. 15:18:24.515891 recv(3, ?xml version=\1.0\ encoding=\UT..., 566, ) = 566 0.038414 Will watch replies and post more when I know more. Kinda new at this. -- SW - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.15 localhost performance hit
On Wed, 15 Mar 2006 20:13:01 -0800 Skunk Worx [EMAIL PROTECTED] wrote: Hello, I've taken a performance hit over localhost between kernels 2.6.14 and 2.6.15 in my client/server application. I'm trying to gut things down to a simple test case, in the meantime, this is what I've been discussing with the people at the fedora test list : This is only over localhost (lo). Two machines running client/server 2.6.15 over ether seem fine, as was 2.6.14. 2.6.14 : about one or two recv() calls out of 48,000 take nearly 40 ms. (no big deal--might add 80 ms. to a 20 second operation). 2.6.15 : about 3,000 recv() calls out of 48,000 take nearly 40 ms. (adds almost two minutes) From strace : 15:27:04.568800 recv(3, ?xml version=\1.0\ encoding=\UT..., 555, ) = 555 0.000121 vs. 15:18:24.515891 recv(3, ?xml version=\1.0\ encoding=\UT..., 566, ) = 566 0.038414 Will watch replies and post more when I know more. Kinda new at this. You need to not do small writes over the loopback interface. If you do big writes, then performance will be much better in all cases. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.15 localhost performance hit
On Wed, 15 Mar 2006 20:13:01 -0800 Skunk Worx [EMAIL PROTECTED] wrote: Hello, I've taken a performance hit over localhost between kernels 2.6.14 and 2.6.15 in my client/server application. I'm trying to gut things down to a simple test case, in the meantime, this is what I've been discussing with the people at the fedora test list : This is only over localhost (lo). Two machines running client/server 2.6.15 over ether seem fine, as was 2.6.14. 2.6.14 : about one or two recv() calls out of 48,000 take nearly 40 ms. (no big deal--might add 80 ms. to a 20 second operation). 2.6.15 : about 3,000 recv() calls out of 48,000 take nearly 40 ms. (adds almost two minutes) From strace : 15:27:04.568800 recv(3, ?xml version=\1.0\ encoding=\UT..., 555, ) = 555 0.000121 vs. 15:18:24.515891 recv(3, ?xml version=\1.0\ encoding=\UT..., 566, ) = 566 0.038414 Will watch replies and post more when I know more. Kinda new at this. This came up with java debugging already. The problem is when the sender writes a message in separate write() system calls, each one becomes a separate packet. In 2.6.15 we do a new thing called Appropriate Byte Count and that penalizes stupid applications, but provides better fairness over the internet by accounting for packets better. If the application does: write(socket, xml version =\, ...) write(socket, 1.0, ... write(socket, \ encoding = , ... then finally read(socket, response) then after the second write system call, the next write will wait until a TCP ack comes back. If the application was smart and used writev() to do scatter gather, or buffering if using stdio; then the data goes out in nice big chunks and there will be no problem. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] TC: bug fixes to the sample clause
On Wed, 2006-03-15 at 22:07 -0500, jamal wrote: Suppose you pick octet 3 of the dst IP address and we assume the full range of that octet i.e a range of values 0-255 (the details are a lot more complicated of where this byte belongs, example this could be part of an IPV6 address and we take all the bits and spread them horizontally in many hash tables): Assume for a specific hash table you are interested in only 6 bits so use a mask of 0xfc. Now clearly it doesnt make sense to have anything more than 64 buckets. So restrict your buckets to 64. Run the two algorithms. Derive using your equation how things look like. I have a feeling that you may need to plot to see this. Next try to make the hasn buckets 64 say 128, 256. Next try to vary the offset of this byte and therefore the mask within a 32 bit;- And when you are done and things look odd then run the full test results by varying the variables. I am certain that the values you picked for hash buckets etc make the old hash look favorable; but that will be one of the _very_ few cases where it would look good. This is not hard - we need some scientific data;- There is no need to run a test. Evidently, I didn't make myself clear in the previous email. Assume the value you are hashing on is: +-+ | B0 B1 B2 B3 B4 B5 B6 B7 | +-+ After masking it will be: +-+ | B0 B1 B2 B3 B4 B5 0 0 | +-+ After hashing it will be: 2.4: 2.6: +-+ +-+ | B0 B1 B2 B3 B4 B5 0 0 | | 0 0 B0 B1 B2 B3 B4 B5 | +-+ +-+ If the divisor is 256 both hashes will perform identically. If the divisor is 64, we save 768 bytes in the kernel. But now the hashes change to: 2.4: 2.6: +-+ +-+ | 0 0 B2 B3 B4 B5 0 0 | | 0 0 B0 B1 B2 B3 B4 B5 | +-+ +-+ With a divisor of 64 bytes, the 2.6 hash produces 6 bit quantity which enumerates to 64 unique values, and the 2.4 hash produces 4 bits which enumerates to 16 unique values. Ergo, each 2.4 bucket will hold 4 values (=64 / 16), whereas the 2.6 buckets will hold one each (=64 / 64). Thus in this case, we can say that either: 2.4 and 2.6 use the same amount of memory, but 2.4 runs slower. 2.4 and 2.6 run at the same speed, but 2.4 uses more memory. Take your pick. BTW, in this example, the new hash I suggested would be as good as the 2.6 case. Now lets take the case that we hashing a number of bytes with a 256 divisor (my case). If these bytes contain truly random values, then again 2.4 and 2.6 will be the same. This is because the hash value 2.6 uses, the low order byte is already perfectly random - so it can't be improved on. The 2.4 XOR's the two values together. XOR has the property that it adds the randomness of the bits together, unless they are correlated. So if you take two partially random bits, and XOR them together, then the resulting bit will be more random that the original two bits. An illustration of this from crypto is a stream cypher like rc4. rc4 effectively produces a random stream of bits. To use rc4, you XOR your plain text with this random stream. Even though your plain text is highly non-random, the cypher text is at least as random as the rc4 stream - and thus looks like gibberish. Anyway, the end result for 2.4 is that if you XOR two perfectly random bytes, the result is a perfectly random byte. So for random data 2.6 and 2.4 are the same. Now lets take non-random data - say a bunch of IPv6 addresses. Particularly in the mac address part, you know some bits are going to be highly non-random. Here 2.4 is going to do much better than 2.6. To illustrate, lets take the trivial case where there is only 1 non-random bit somewhere. The odds of that random bit being in the low order byte, which is what becomes 2.6's hash, is 1 in 4 (there being a 1 in 4 chance that bit will end up in the lower order byte - there being 4 bytes it could end up in with equal probability). So 2.6 has a 1 in 4 chance of 2.6 producing a good hash. In the case of 2.4, because the bytes are XOR'ed, the odds are 1 in 1 - ie it is a certain 2.4 will produce the best hash possible. If there are 2 random bits, then then best hash will have 2 bits of randomness. The odds of the 2.6 ending up with this hash are 1 in 17.71. (This is (8/32)*(7/32) - ie there are 8 ways in 32 the first bit can end up in the low order byte, and 7 ways in 31 the next bit can end up in the low order byte, and we need both these events to happen so we multiply. The odds of 2.4 ending up with it is 10.33. (Once the first bit is allocated, there are 3 ways in 31 the second bit would overlap it in an XOR.) The odds of 2.6 picking the next best
Re: [PATCH]: e1000 endianness bugs
On Wed, Mar 15, 2006 at 02:26:28PM -0800, David S. Miller wrote: return -E_NO_BIG_ENDIAN_TESTING; [E1000]: Fix 4 missed endianness conversions on RX descriptor fields. Could the e1000 maintainers please add endianess annotations so that sparse will catch such things in the future? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html