[PATCH] Net: Remove FASTCALL macro
X86_32 was the last user of the FASTCALL macro, now that it uses regparm(3) by default, this macro expands to nothing. Signed-off-by: Harvey Harrison [EMAIL PROTECTED] --- Should this be coordinated with the FASTCALL() removal in the x86 git tree? drivers/net/ns83820.c |5 + include/net/bluetooth/rfcomm.h |4 ++-- include/net/sock.h |4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index ea71f6d..972acc3 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -611,7 +611,7 @@ static inline int rx_refill(struct net_device *ndev, gfp_t gfp) return i ? 0 : -ENOMEM; } -static void FASTCALL(rx_refill_atomic(struct net_device *ndev)); +static void rx_refill_atomic(struct net_device *ndev)); static void fastcall rx_refill_atomic(struct net_device *ndev) { rx_refill(ndev, GFP_ATOMIC); @@ -633,7 +633,6 @@ static inline void clear_rx_desc(struct ns83820 *dev, unsigned i) build_rx_desc(dev, dev-rx_info.descs + (DESC_SIZE * i), 0, 0, CMDSTS_OWN, 0); } -static void FASTCALL(phy_intr(struct net_device *ndev)); static void fastcall phy_intr(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); @@ -832,7 +831,6 @@ static void ns83820_cleanup_rx(struct ns83820 *dev) } } -static void FASTCALL(ns83820_rx_kick(struct net_device *ndev)); static void fastcall ns83820_rx_kick(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); @@ -854,7 +852,6 @@ static void fastcall ns83820_rx_kick(struct net_device *ndev) /* rx_irq * */ -static void FASTCALL(rx_irq(struct net_device *ndev)); static void fastcall rx_irq(struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 25aa575..98ec7a3 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -252,8 +252,8 @@ static inline void rfcomm_dlc_put(struct rfcomm_dlc *d) rfcomm_dlc_free(d); } -extern void FASTCALL(__rfcomm_dlc_throttle(struct rfcomm_dlc *d)); -extern void FASTCALL(__rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)); +extern void __rfcomm_dlc_throttle(struct rfcomm_dlc *d); +extern void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d); static inline void rfcomm_dlc_throttle(struct rfcomm_dlc *d) { diff --git a/include/net/sock.h b/include/net/sock.h index 67e35c7..bdad9ba 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -812,14 +812,14 @@ do { \ lockdep_init_map((sk)-sk_lock.dep_map, (name), (key), 0); \ } while (0) -extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass)); +extern void lock_sock_nested(struct sock *sk, int subclass); static inline void lock_sock(struct sock *sk) { lock_sock_nested(sk, 0); } -extern void FASTCALL(release_sock(struct sock *sk)); +extern void release_sock(struct sock *sk); /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(((__sk)-sk_lock.slock)) -- 1.5.3.7.2212.gd092 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] [NETDEV] ixp2000: rtnl_lock out of loop will be faster
[PATCH 3/4] [NETDEV] ixp2000: rtnl_lock out of loop will be faster Before this patch, it gets and releases the lock at each iteration of the loop. Changing unregister_netdev to unregister_netdevice and locking outside of the loop will be faster for this approach. Signed-off-by: Wang Chen [EMAIL PROTECTED] --- ixpdev.c | 12 +--- 1 files changed, 9 insertions(+), 3 deletions(-) diff -Nurp linux-2.6.24.rc5.org/drivers/net/ixp2000/ixpdev.c linux-2.6.24.rc5/drivers/net/ixp2000/ixpdev.c --- linux-2.6.24.rc5.org/drivers/net/ixp2000/ixpdev.c 2007-12-12 10:19:41.0 +0800 +++ linux-2.6.24.rc5/drivers/net/ixp2000/ixpdev.c 2007-12-12 16:25:44.0 +0800 @@ -15,6 +15,7 @@ #include linux/etherdevice.h #include linux/init.h #include linux/moduleparam.h +#include linux/rtnetlink.h #include asm/hardware/uengine.h #include asm/mach-types.h #include asm/io.h @@ -375,14 +376,16 @@ int ixpdev_init(int __nds_count, struct ixp2000_uengine_load(1, ixp2400_tx); ixp2000_uengine_start_contexts(1, 0xff); + rtnl_lock(); for (i = 0; i nds_count; i++) { - err = register_netdev(nds[i]); + err = register_netdevice(nds[i]); if (err) { while (--i = 0) - unregister_netdev(nds[i]); + unregister_netdevice(nds[i]); goto err_free_tx; } } + rtnl_unlock(); for (i = 0; i nds_count; i++) { printk(KERN_INFO %s: IXP2000 MSF ethernet (port %d), @@ -395,6 +398,7 @@ int ixpdev_init(int __nds_count, struct return 0; err_free_tx: + rtnl_unlock(); for (i = 0; i TX_BUF_COUNT; i++) free_page((unsigned long)phys_to_virt(tx_desc[i].buf_addr)); @@ -412,8 +416,10 @@ void ixpdev_deinit(void) /* @@@ Flush out pending packets. */ + rtnl_lock(); for (i = 0; i nds_count; i++) - unregister_netdev(nds[i]); + unregister_netdevice(nds[i]); + rtnl_unlock(); ixp2000_uengine_stop_contexts(1, 0xff); ixp2000_uengine_stop_contexts(0, 0xff); -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] [NETDEV] cxgb3: rtnl_lock out of loop will be faster
[PATCH 2/4] [NETDEV] cxgb3: rtnl_lock out of loop will be faster Before this patch, it gets and releases the lock at each iteration of the loop. Changing unregister_netdev to unregister_netdevice and locking outside of the loop will be faster for this approach. Signed-off-by: Wang Chen [EMAIL PROTECTED] --- cxgb3_main.c |4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) --- linux-2.6.24.rc5.org/drivers/net/cxgb3/cxgb3_main.c 2007-12-12 10:19:39.0 +0800 +++ linux-2.6.24.rc5/drivers/net/cxgb3/cxgb3_main.c 2007-12-12 15:19:15.0 +0800 @@ -2584,9 +2584,11 @@ static void __devexit remove_one(struct sysfs_remove_group(adapter-port[0]-dev.kobj, cxgb3_attr_group); + rtnl_lock(); for_each_port(adapter, i) if (test_bit(i, adapter-registered_device_map)) - unregister_netdev(adapter-port[i]); + unregister_netdevice(adapter-port[i]); + rtnl_unlock(); if (is_offload(adapter)) { cxgb3_adapter_unofld(adapter); -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] [NETDEV] sky2: rtnl_lock out of loop will be faster
[PATCH 4/4] [NETDEV] sky2: rtnl_lock out of loop will be faster Before this patch, it gets and releases the lock at each iteration of the loop. Changing unregister_netdev to unregister_netdevice and locking outside of the loop will be faster for this approach. Signed-off-by: Wang Chen [EMAIL PROTECTED] --- sky2.c |4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) --- linux-2.6.24.rc5.org/drivers/net/sky2.c 2007-12-12 10:19:43.0 +0800 +++ linux-2.6.24.rc5/drivers/net/sky2.c 2007-12-12 15:23:37.0 +0800 @@ -4270,8 +4270,10 @@ static void __devexit sky2_remove(struct del_timer_sync(hw-watchdog_timer); cancel_work_sync(hw-restart_work); + rtnl_lock(); for (i = hw-ports-1; i = 0; --i) - unregister_netdev(hw-dev[i]); + unregister_netdevice(hw-dev[i]); + rtnl_unlock(); sky2_write32(hw, B0_IMSK, 0); -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 7/7] [NETDEV]: myri10ge Fix possible causing oops of net_rx_action
Would you like to submit the Driver Test Cases to LTP ? Or, if you have your Driver test cases written in C, we can port to LTP and have it inside the LTP package. You can also check out the following links for more info, and can always ping me for anything: http://ltp.cvs.sourceforge.net/ltp/ltp/testcases/kernel/device-drivers/, http://ltp.sourceforge.net/documentation/how-to/ltp.php? Regards-- Subrata (LTP Maintainer) On Wed, 2007-12-12 at 13:01 +0900, Joonwoo Park wrote: [NETDEV]: myri10ge Fix possible causing oops of net_rx_action Signed-off-by: Joonwoo Park [EMAIL PROTECTED] --- drivers/net/myri10ge/myri10ge.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 8def865..57311ed 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -1242,6 +1242,8 @@ static int myri10ge_poll(struct napi_struct *napi, int budget) if (work_done budget || !netif_running(netdev)) { netif_rx_complete(netdev, napi); put_be32(htonl(3), mgp-irq_claim); + if (unlikely(work_done == napi-weight)) + return work_done - 1; } return work_done; } --- -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] [NETDEV] cxgb2: rtnl_lock out of loop will be faster
[PATCH 1/4] [NETDEV] cxgb2: rtnl_lock out of loop will be faster Before this patch, it gets and releases the lock at each iteration of the loop. Changing unregister_netdev to unregister_netdevice and locking outside of the loop will be faster for this approach. Signed-off-by: Wang Chen [EMAIL PROTECTED] --- cxgb2.c |5 - 1 files changed, 4 insertions(+), 1 deletion(-) --- linux-2.6.24.rc5.org/drivers/net/chelsio/cxgb2.c2007-12-12 10:19:39.0 +0800 +++ linux-2.6.24.rc5/drivers/net/chelsio/cxgb2.c2007-12-12 16:24:14.0 +0800 @@ -46,6 +46,7 @@ #include linux/mii.h #include linux/sockios.h #include linux/dma-mapping.h +#include linux/rtnetlink.h #include asm/uaccess.h #include cpl5_cmd.h @@ -1387,10 +1388,12 @@ static void __devexit remove_one(struct struct adapter *adapter = dev-priv; int i; + rtnl_lock(); for_each_port(adapter, i) { if (test_bit(i, adapter-registered_device_map)) - unregister_netdev(adapter-port[i].dev); + unregister_netdevice(adapter-port[i].dev); } + rtnl_unlock(); t1_free_sw_modules(adapter); iounmap(adapter-regs); -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [4/4] DST: Algorithms used in distributed storage.
On Wed, Dec 12, 2007 at 12:12:47PM +0300, Dmitry Monakhov ([EMAIL PROTECTED]) wrote: On 14:47 Mon 10 Dec , Evgeniy Polyakov wrote: Algorithms used in distributed storage. Mirror and linear mapping code. Hi, i've finally take a look on your DST solution. It seems what your current implementation will not work on nonstandard devices for example software raid0. other comments are follows: +static int dst_mirror_process_node_data(struct dst_node *n, + struct dst_mirror_node_data *ndata, int op) + + kunmap(cmp-page); MINOR_BUG: You has forgot to unmap page on error path, so IMHO it is better to move kunmap to err_out_free_cmp label. Yep, I will fix this. + priv = kzalloc(sizeof(struct dst_mirror_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv-chunk_num = st-disk_size; + + priv-chunk = vmalloc(DIV_ROUND_UP(priv-chunk_num, BITS_PER_LONG) * sizeof(long)); Ohhh. My. I want to add my 500G hdd. Do you really wanna say what i have to store 128Mb in memory object for this. Right now yes. There was a code which used single bit for bigger data units, but I dropped it because of resync troubles (i.e. when one single sector has been updated, it requires to resync the whole block). I can not say which case is better though. + dprintk(%s: start: %llu, size: %llu/%u, bio: %p, req: %p, + node: %p.\n, + __func__, req-start, req-size, nr_pages, bio, + req, req-node); + + err = n-st-queue-make_request_fn(n-st-queue, bio); Why direct make_request_fn instead of generic_make_request? generic_make_request() will queue the bio in this case, so I call request_fn directly. + for (i = 0; i DIV_ROUND_UP(priv-chunk_num, BITS_PER_LONG); ++i) { + int bit, num, start; + unsigned long word = priv-chunk[i]; + + if (!word) + continue; + + num = 0; + start = -1; + while (word num BITS_PER_LONG) { + bit = __ffs(word); + if (start == -1) + start = bit; + num++; MINOR_BUG: Seems you have misstyped here. AFAIU @num represent position of last non zero bit (start + num == last_non_zero_bit_pos) if (start == -1) { start = bit; num = 1; } else num += bit; Yes, you are right of course. Since I shift word to more than a single bit, @num has to be update accordingly. + word = (bit+1); Dmitry, thanks a lot for comments, I will fix issues you pointed in the next release, although will stay bitmap case opened for a while. -- Evgeniy Polyakov -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [4/4] DST: Algorithms used in distributed storage.
On 14:47 Mon 10 Dec , Evgeniy Polyakov wrote: Algorithms used in distributed storage. Mirror and linear mapping code. Hi, i've finally take a look on your DST solution. It seems what your current implementation will not work on nonstandard devices for example software raid0. other comments are follows: Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] diff --git a/drivers/block/dst/alg_linear.c b/drivers/block/dst/alg_linear.c new file mode 100644 index 000..9dc0976 --- /dev/null +++ b/drivers/block/dst/alg_linear.c @@ -0,0 +1,105 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/module.h +#include linux/kernel.h +#include linux/init.h +#include linux/dst.h + +static struct dst_alg *alg_linear; + +/* + * This callback is invoked when node is removed from storage. + */ +static void dst_linear_del_node(struct dst_node *n) +{ +} + +/* + * This callback is invoked when node is added to storage. + */ +static int dst_linear_add_node(struct dst_node *n) +{ + struct dst_storage *st = n-st; + + dprintk(%s: disk_size: %llu, node_size: %llu.\n, + __func__, st-disk_size, n-size); + + mutex_lock(st-tree_lock); + n-start = st-disk_size; + st-disk_size += n-size; + set_capacity(st-disk, st-disk_size); + mutex_unlock(st-tree_lock); + + return 0; +} + +static int dst_linear_remap(struct dst_request *req) +{ + int err; + + if (req-node-bdev) { + generic_make_request(req-bio); + return 0; + } + + err = kst_check_permissions(req-state, req-bio); + if (err) + return err; + + return req-state-ops-push(req); +} + +/* + * Failover callback - it is invoked each time error happens during + * request processing. + */ +static int dst_linear_error(struct kst_state *st, int err) +{ + if (err) + set_bit(DST_NODE_FROZEN, st-node-flags); + else + clear_bit(DST_NODE_FROZEN, st-node-flags); + return 0; +} + +static struct dst_alg_ops alg_linear_ops = { + .remap = dst_linear_remap, + .add_node = dst_linear_add_node, + .del_node = dst_linear_del_node, + .error = dst_linear_error, + .owner = THIS_MODULE, +}; + +static int __devinit alg_linear_init(void) +{ + alg_linear = dst_alloc_alg(alg_linear, alg_linear_ops); + if (!alg_linear) + return -ENOMEM; + + return 0; +} + +static void __devexit alg_linear_exit(void) +{ + dst_remove_alg(alg_linear); +} + +module_init(alg_linear_init); +module_exit(alg_linear_exit); + +MODULE_LICENSE(GPL); +MODULE_AUTHOR(Evgeniy Polyakov [EMAIL PROTECTED]); +MODULE_DESCRIPTION(Linear distributed algorithm.); diff --git a/drivers/block/dst/alg_mirror.c b/drivers/block/dst/alg_mirror.c new file mode 100644 index 000..3c457ff --- /dev/null +++ b/drivers/block/dst/alg_mirror.c @@ -0,0 +1,1128 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/module.h +#include linux/kernel.h +#include linux/init.h +#include linux/poll.h +#include linux/dst.h + +struct dst_mirror_node_data +{ + u64 age; +}; + +struct dst_mirror_priv +{ + unsigned intchunk_num; + + u64 last_start; + + spinlock_t backlog_lock; + struct list_headbacklog_list; + + struct dst_mirror_node_data old_data, new_data; + + unsigned long *chunk; +}; + +static struct dst_alg *alg_mirror; +static struct bio_set *dst_mirror_bio_set; + +static int dst_mirror_resync(struct dst_node *n, int ndp); + +static void dst_mirror_mark_sync(struct dst_node *n) +{ + if (test_bit(DST_NODE_NOTSYNC, n-flags)) { +
[PATCH 2.6.25] netns: network namespace was passed into dev_getbyhwaddr but not used
netns: network namespace was passed into dev_getbyhwaddr but not used Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] diff --git a/net/core/dev.c b/net/core/dev.c index 06615df..677f35b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -675,7 +675,7 @@ struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *h ASSERT_RTNL(); - for_each_netdev(init_net, dev) + for_each_netdev(net, dev) if (dev-type == type !memcmp(dev-dev_addr, ha, dev-addr_len)) return dev; -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2.6.25] IPv4: thresholds in fib_trie.c are not modified, make them const
IPv4: thresholds in fib_trie.c are not modified, make them const Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 45a6e4d..57a2c46 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -299,10 +299,10 @@ static inline void check_tnode(const struct tnode *tn) WARN_ON(tn tn-pos+tn-bits 32); } -static int halve_threshold = 25; -static int inflate_threshold = 50; -static int halve_threshold_root = 15; -static int inflate_threshold_root = 25; +static const int halve_threshold = 25; +static const int inflate_threshold = 50; +static const int halve_threshold_root = 15; +static const int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) { -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [kvm-devel] [PATCH resent] virtio_net: Fix stalled inbound trafficon early packets
Am Mittwoch, 12. Dezember 2007 schrieb Dor Laor: This is why initially I suggested another status code in order to split the ring logic with driver status. but also not filling any buffers as long as VIRTIO_CONFIG_DEV_OPEN is not set. I will have a look but I think that add_status needs to be called It can fill the buffers even without dev_open, when the dev is finally opened the host will issue an interrupt if there are pending buffers. There is a problem associated with that scheme. Setting the value in the config space does not notify the hypervisor. That means the host will not send an interrupt. The interrupt is sent if the host tries to send the next packet. If somehow the host manages to use up all buffers before the device is finally opened, we have a problem. (I'm not sure it's worth solving, maybe just drop them like you suggested). As said above, dropping seems to me the preferred method. Did I miss something? Christian -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [kvm-devel] [PATCH resent] virtio_net: Fix stalled inbound trafficon early packets
Am Mittwoch, 12. Dezember 2007 schrieb Rusty Russell: On Wednesday 12 December 2007 00:16:12 Christian Borntraeger wrote: That would also work. We already have VRING_AVAIL_F_NO_INTERRUPT in virtio_ring.c - maybe we can use that. Its hidden in callback and restart handling, what about adding an explicit startup? Yes, I debated whether to make this a separate hook or not; the current method reduces the number of function calls without having two ways of disabling callbacks. In this case, simply starting devices with callbacks disabled and renaming 'restart' to 'enable' (or something) and calling it at the beginning is probably sufficient? So you suggest something like the following patch? It seems to work but there is still a theoretical race at startup. Therefore, I tend to agree with Dor that a separate hook seems prefereable, so I am not fully sure if the patch is the final solution: ps: Its ok to answer that after your vacation. --- drivers/block/virtio_blk.c |3 ++- drivers/net/virtio_net.c |5 - drivers/virtio/virtio_ring.c |9 - include/linux/virtio.h |4 ++-- 4 files changed, 12 insertions(+), 9 deletions(-) Index: kvm/drivers/virtio/virtio_ring.c === --- kvm.orig/drivers/virtio/virtio_ring.c +++ kvm/drivers/virtio/virtio_ring.c @@ -220,7 +220,7 @@ static void *vring_get_buf(struct virtqu return ret; } -static bool vring_restart(struct virtqueue *_vq) +static bool vring_enable(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -264,7 +264,7 @@ static struct virtqueue_ops vring_vq_ops .add_buf = vring_add_buf, .get_buf = vring_get_buf, .kick = vring_kick, - .restart = vring_restart, + .enable = vring_enable, .shutdown = vring_shutdown, }; @@ -299,9 +299,8 @@ struct virtqueue *vring_new_virtqueue(un vq-in_use = false; #endif - /* No callback? Tell other side not to bother us. */ - if (!callback) - vq-vring.avail-flags |= VRING_AVAIL_F_NO_INTERRUPT; + /* disable interrupts until we enable them */ + vq-vring.avail-flags |= VRING_AVAIL_F_NO_INTERRUPT; /* Put everything in free lists. */ vq-num_free = num; Index: kvm/include/linux/virtio.h === --- kvm.orig/include/linux/virtio.h +++ kvm/include/linux/virtio.h @@ -41,7 +41,7 @@ struct virtqueue * vq: the struct virtqueue we're talking about. * len: the length written into the buffer * Returns NULL or the data token handed to add_buf. - * @restart: restart callbacks after callback returned false. + * @enable: restart callbacks after callback returned false. * vq: the struct virtqueue we're talking about. * This returns false (and doesn't re-enable) if there are pending * buffers in the queue, to avoid a race. @@ -65,7 +65,7 @@ struct virtqueue_ops { void *(*get_buf)(struct virtqueue *vq, unsigned int *len); - bool (*restart)(struct virtqueue *vq); + bool (*enable)(struct virtqueue *vq); void (*shutdown)(struct virtqueue *vq); }; Index: kvm/drivers/net/virtio_net.c === --- kvm.orig/drivers/net/virtio_net.c +++ kvm/drivers/net/virtio_net.c @@ -201,7 +201,7 @@ again: /* Out of packets? */ if (received budget) { netif_rx_complete(vi-dev, napi); - if (unlikely(!vi-rvq-vq_ops-restart(vi-rvq)) + if (unlikely(!vi-rvq-vq_ops-enable(vi-rvq)) netif_rx_reschedule(vi-dev, napi)) goto again; } @@ -292,6 +292,9 @@ static int virtnet_open(struct net_devic return -ENOMEM; napi_enable(vi-napi); + + vi-rvq-vq_ops-enable(vi-rvq); + vi-svq-vq_ops-enable(vi-svq); return 0; } Index: kvm/drivers/block/virtio_blk.c === --- kvm.orig/drivers/block/virtio_blk.c +++ kvm/drivers/block/virtio_blk.c @@ -183,7 +183,8 @@ static int virtblk_probe(struct virtio_d err = PTR_ERR(vblk-vq); goto out_free_vblk; } - + /* enable interrupts */ + vblk-vq-vq_ops-enable(vblk-vq); vblk-pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); if (!vblk-pool) { err = -ENOMEM; -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] [net/wireless/iwlwifi] : iwlwifi 3945 Fix raceconditional panic.
On Thu, 29 Nov 2007 10:42:49 +0900 Joonwoo Park [EMAIL PROTECTED] wrote: 2007/11/29, Zhu Yi [EMAIL PROTECTED]: Good catch. But it will be better if you add it into iwl_cancel_deferred_work(). Thanks. I agree with you. Actually, I considered it, but I was afraid of side effect. Anyway, I'm attaching a new one. Thanks. Joonwoo [net/wireless/iwlwifi] : iwlwifi 3945 Fix race conditional panic. Signed-off-by: Joonwoo Park [EMAIL PROTECTED] --- diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 465da4f..e51e872 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -8270,6 +8270,7 @@ static void iwl_cancel_deferred_work(struct iwl_priv *priv) { iwl_hw_cancel_deferred_work(priv); + cancel_delayed_work_sync(priv-init_alive_start); cancel_delayed_work(priv-scan_check); cancel_delayed_work(priv-alive_start); cancel_delayed_work(priv-post_associate); Did drivers/net/wireless/iwlwifi/iwl4965-base.c get fixed in a simlar fashion? -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] iproute2: support dotted-quad netmask notation.
On Tue, Dec 11, 2007 at 05:14:06PM -0800, Stephen Hemminger wrote: On Sun, 09 Dec 2007 18:10:22 +0100 Andreas Henriksson [EMAIL PROTECTED] wrote: I think both previous patches where broken on big-endian platforms. Here's an updated patch again. I'm very sorry for the inconvenience! [...] + *val=0; + for (mask = ntohl(addr.data[0]); mask; mask = 1) + (*val)++; [...] applied Just to make sure It looks on git.kernel.org like you applied the wrong patch. (Maybe you just haven't pushed out the latest changes there yet.) Please double-check that you actually applied the latest version (which is the one in the mail you replied applied to, important part quoted above). Additionally, there's still a couple of trivial patches pending in the patches branch of git://git.debian.org/git/collab-maint/pkg-iproute.git Please see the original thread[1], where Patrick McHarding had some concerns about one of the patches. It's exactly the same changes you made in commit 660818498d0f5a3f52c05355a3e82c23f670fcc1 [2] though, so I don't really see the problem. I have an additional patch[3] available, that makes MAX_ROUNDS configurable which Patrick requested. Please comment on the way forward there [1]: http://www.spinics.net/lists/netdev/msg44800.html [2]: Where the comment seems to be wrong about Limit ip route flush..., since it's actually ip neigh flush that's being modified. [3]: I have a slightly updated patch, but it's basically the same as http://www.spinics.net/lists/netdev/msg45080.html Will send updated version if the patches it's based on goes in. -- Regards, Andreas Henriksson -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[virtio-net][PATCH] Don't arm tx hrtimer with a constant 500us each transmit
commit 763769621d271d92204ed27552d75448587c1ac0 Author: Dor Laor [EMAIL PROTECTED] Date: Wed Dec 12 14:52:00 2007 +0200 [virtio-net][PATCH] Don't arm tx hrtimer with a constant 50us each transmit The current start_xmit sets 500us hrtimer to kick the host. The problem is that if another xmit happens before the timer was fired then the first xmit will have to wait additional 500us. This patch does not re-arm the timer if there is existing one. This will shorten the latency for tx. Signed-off-by: Dor Laor [EMAIL PROTECTED] diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7b051d5..8bb17d1 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -406,10 +405,10 @@ again: virtio_debug(vdebug, %s: before calling kick %d\n, __FUNCTION__, __LINE__); vi-svq-vq_ops-kick(vi-svq); vi-out_num = 0; -} else { -vi-stats.hrtimer_starts++; -hrtimer_start(vi-tx_timer, ktime_set(0,50), - HRTIMER_MODE_REL); +} else if (!hrtimer_is_queued(vi-tx_timer)) { +vi-stats.hrtimer_starts++; +hrtimer_start(vi-tx_timer, ktime_set(0,50), + HRTIMER_MODE_REL); } return 0; } -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] Increase virtual FIFOs in ucc_geth.
On Tue, 2007-12-11 at 19:51 +0800, Li Yang wrote: -Original Message- From: Joakim Tjernlund [mailto:[EMAIL PROTECTED] Sent: Tuesday, December 11, 2007 6:58 PM To: Li Yang Cc: netdev@vger.kernel.org Subject: RE: [PATCH] Increase virtual FIFOs in ucc_geth. On Tue, 2007-12-11 at 11:11 +0100, Joakim Tjernlund wrote: On Tue, 2007-12-11 at 17:49 +0800, Li Yang wrote: -Original Message- From: Joakim Tjernlund [mailto:[EMAIL PROTECTED] Sent: Tuesday, December 11, 2007 2:46 AM To: Li Yang-r58472 [EMAIL PROTECTED] Netdev Cc: Joakim Tjernlund Subject: [PATCH] Increase virtual FIFOs in ucc_geth. Increase UCC_GETH_URFS_INIT to 1152 and UCC_GETH_UTFS_INIT to 896 to avoid HW Overrun/Underrun. Please be noted that these values are only used for 10/100Mbps speed. Did you get Overrun in 10/100M mode? I get both TX Underrun and RX overrun in 100Mbps, FD, just by running a tftp transfer. It feels like the URFET and/or URSFET isn't working. Why I don't know. CPU is MPC832x Jocke I am a bit confused how the RBMR and TBMR is supposed to work. In ucc_get there is: out_be32(ugeth-p_tx_glbl_pram-tstate, ((u32) function_code) 24); ugeth-p_rx_glbl_pram-rstate = function_code; First, should not the rx part look the same as tx? To be consist with the chip RM, type for tstate is u32 and type for rstate is u8. Personally I don't think that it will be different to access tstate as u8, but it will be more readable to be the same as manual. Well, rstate probably should be changed to use IO accessor too. Does programing rstate/tstate replace RBMR and TBMR? RBMR and TBMR? These are for other protocols. - Leo Noticed that the number of threads(numThreadsTx and numThreadsRx) is set to 4. That is a good value for GBE, but the manual thinks 1 is good for 10/100 Mbps. How does 4 threads affect the FIFO for 100 Mbps? I can't test ATM as the system is busy with other things. Jocke -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [kvm-devel] [virtio-net][PATCH] Don't arm tx hrtimer with a constant 500us each transmit
Am Mittwoch, 12. Dezember 2007 schrieb Dor Laor: --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -406,10 +405,10 @@ again: Hmm, while I agree in general with the patch, I fail to find the proper version of virtio_net where this patch applies. I tried kvm.git and linux-2.6.git from kernel.org. Can you give me a pointer to the repository where you work on virtio? Christian -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: DM9000_IRQ_FLAGS
Hi Remy, On Tue, Dec 11, 2007 at 09:31:03PM +0100, Remy Bohmer wrote: This controller is also used on many other boards, like the e.g. Atmel AT91sam9261-ek board. On that board on both the rising _and_ falling edge an interrupt is generated. However, request_irq() is called with IRQF_SHARED only, so neither IRQT_RISING nor IRQT_FALLING is set and the value defaults to IRQT_NOEDGE. How can you get IRQs? I can test tomorrow if this patch leaves this board in tact, but should the board-specific code not add this flag if it is required ? By modifying this driver you will interfere the behavior of other boards, and I do not know if there any level triggered types used. Actually, the best way to go is to let the platform resources flags decide about that with something like resource-flags = IORESOURCE_IRQ | IRQT_RISING; but the dm9000 does not care about them at all. Changing that would also imply modifications to all board support code. Regards, Daniel -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPSEC]: Add populate from packet (PFP) support
On Tue, Dec 11, 2007 at 07:23:52PM -0800, Tyler Hicks wrote: RFC 4301 requires us to associate each SPD entry with a set of flags to determine how to assign the selector values when creating a new SAD entry. Each selector in the new xfrm_state can either be assigned using the corresponding selector in the xfrm_policy or with the corresponding value in the flowi. Prior to this patch, the fields in the flowi were always used. Signed-off-by: Tyler Hicks [EMAIL PROTECTED] Thanks for the patch Tyler! I think the kernel is fine as it is. What we're doing is generating the most specific selector possible for the larval SA and which lets the KM do whatever it wants. What RFC 4301 is asking for is for the mature SAs to have their selectors either populated from the policy or the packet. So for PFP the KM should fill out its SA selector according to its PFP flags. In other words we don't need PFP flags in the kernel at all. + if (pol-flags XFRM_POLICY_PFP_SPORT) { + x-sel.sport = xfrm_flowi_sport(fl); + x-sel.sport_mask = htons(0x); + } else { + x-sel.sport = pol-selector.sport; + x-sel.sport = pol-selector.sport_mask; There's a typo here. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bugme-new] [Bug 9543] New: RTNL: assertion failed at net/ipv6/addrconf.c (2164)/RTNL: assertion failed at net/ipv4/devinet.c (1055)
Andrew Morton [EMAIL PROTECTED] wrote: From: Andrew Morton [EMAIL PROTECTED] Remove stray rtnl_unlock(). Addresses http://bugzilla.kernel.org/show_bug.cgi?id=9542 Adnrew, please cc Jay Vosburgh [EMAIL PROTECTED] on bonding issues. diff -puN drivers/net/bonding/bond_sysfs.c~bonding-locking-fix drivers/net/bonding/bond_sysfs.c --- a/drivers/net/bonding/bond_sysfs.c~bonding-locking-fix +++ a/drivers/net/bonding/bond_sysfs.c @@ -,8 +,6 @@ static ssize_t bonding_store_primary(str out: write_unlock_bh(bond-lock); - rtnl_unlock(); - Looking at the changeset that added this perhaps the intention is to hold the lock? If so we should add an rtnl_lock to the start of the function. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [kvm-devel] [virtio-net][PATCH] Don't arm tx hrtimer with a constant 500us each transmit
Christian Borntraeger wrote: Am Mittwoch, 12. Dezember 2007 schrieb Dor Laor: --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -406,10 +405,10 @@ again: Hmm, while I agree in general with the patch, I fail to find the proper version of virtio_net where this patch applies. I tried kvm.git and linux-2.6.git from kernel.org. Can you give me a pointer to the repository where you work on virtio? Sorry for that, I added some debug prints of my one. Here it is: *git clone git*://kvm.*qumranet*.com/home/*dor*/src/linux-2.6-nv use branch 'virtio'. BTW: what git repository do you use? This patch improves my tx performance from 720Mbps to 900Mbps. Dor Christian -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bug 9542] BUG: bad unlock balance detected!
On Tue, Dec 11, 2007 at 10:06:14PM +, Andrew Morton wrote: events/0/9 just changed the state of lock: (mc-mca_lock){-+..}, at: [c0412602] mld_ifc_timer_expire+0x130/0x1fb but this lock took another, soft-read-irq-unsafe lock in the past: (bond-lock){-.--} and interrupts could create inverse lock ordering between them. Yes this is a bug in the bonding driver. It's assuming that bond-lock is only ever held for writing in process context. Unfortunately our current set_multicast_list interface violates this constraint. Now I do have a TODO item to fix set_multicast_list to not do that, but until that happens, bond-lock should always turn BH off, even on read lock. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] [net/wireless/iwlwifi] : iwlwifi 3945 Fix raceconditional panic.
2007/12/12, Andrew Morton [EMAIL PROTECTED]: Did drivers/net/wireless/iwlwifi/iwl4965-base.c get fixed in a simlar fashion? Hi Andrew, Both of them was applied to mainline. commit 3ae6a054553ee8b7f74bf7de8904022b26705778 Author: Joonwoo Park [EMAIL PROTECTED] Date: Thu Nov 29 10:43:16 2007 +0900 iwlwifi 4965 Fix race conditional panic. Signed-off-by: Joonwoo Park [EMAIL PROTECTED] Signed-off-by: John W. Linville [EMAIL PROTECTED] commit e47eb6ad41e8fc4c2696665512b70d1fd4cf3f22 Author: Joonwoo Park [EMAIL PROTECTED] Date: Thu Nov 29 10:42:49 2007 +0900 iwlwifi 3945 Fix race conditional panic. Signed-off-by: Joonwoo Park [EMAIL PROTECTED] Signed-off-by: John W. Linville [EMAIL PROTECTED] Thanks, Joonwoo -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 7/7] [NETDEV]: myri10ge Fix possible causing oops of net_rx_action
2007/12/12, Subrata Modak [EMAIL PROTECTED]: Would you like to submit the Driver Test Cases to LTP ? Or, if you have your Driver test cases written in C, we can port to LTP and have it inside the LTP package. You can also check out the following links for more info, and can always ping me for anything: http://ltp.cvs.sourceforge.net/ltp/ltp/testcases/kernel/device-drivers/, http://ltp.sourceforge.net/documentation/how-to/ltp.php? Regards-- Subrata (LTP Maintainer) Subrata, I can't sure it's possible since my test was needed some hardware stuffs but I'll try it in near future and contact you. Thanks. Joonwoo -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Remove unused define from loopback driver
The LOOPBACK_OVERHEAD is not used in this file at all. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] --- diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 662b8d1..3d1c743 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -64,8 +64,6 @@ struct pcpu_lstats { unsigned long bytes; }; -#define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16) - /* KISS: just allocate small chunks and copy bits. * * So, in fact, this is documentation, explaining what we expect -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: DM9000_IRQ_FLAGS
Hello Daniel, However, request_irq() is called with IRQF_SHARED only, so neither IRQT_RISING nor IRQT_FALLING is set and the value defaults to IRQT_NOEDGE. How can you get IRQs? The DM9000 adapter is connected to a GPIO line on the at91sam9261-ek board, the board-specific code has a generic handler for every GPIO interrupt. So, the Device driver __does not need to know__ the type of interrupt, It just installs a handler by request_irq(). So, these flags are dependant on the board where the driver is installed, and thus everything you want to define fixed in the driver is not generic and thus wrong. This is why a NACK your patch. Actually, the best way to go is to let the platform resources flags decide about that with something like resource-flags = IORESOURCE_IRQ | IRQT_RISING; These types of flags are never meant to be compatible, and thus they shall not be stored in the same flags variable. The same is valid for the flags IRQF_* and IRQT* flags, so request_irq() is also not the place to do this. You should look at the routine set_irq_type() in kernel/irq/chip.c. This routine is supposed to be used for the IRQT_RISING type of flags Just do a grep of IRQT_RISING in the arch/arm tree and you will find enough examples. Kind Regards, Remy -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: DM9000_IRQ_FLAGS
Hi, On Dec 12, 2007, at 3:46 PM, Remy Bohmer wrote: The DM9000 adapter is connected to a GPIO line on the at91sam9261-ek board, the board-specific code has a generic handler for every GPIO interrupt. So, the Device driver __does not need to know__ the type of interrupt, It just installs a handler by request_irq(). Ok. resource-flags = IORESOURCE_IRQ | IRQT_RISING; These types of flags are never meant to be compatible, and thus they shall not be stored in the same flags variable. The same is valid for the flags IRQF_* and IRQT* flags, so request_irq() is also not the place to do this. Ah, so I was trapped by faulty code doing that. I thought as the type of interrupt is also a detail information about the resource, this is the place to put is. However, it also works with set_irq_type(). Thanks for sorting that out :) Best regards, Daniel -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] net: napi fix
From: Stephen Hemminger [EMAIL PROTECTED] Date: Tue, 11 Dec 2007 21:46:34 -0800 Isn't this a better fix for all drivers, rather than peppering every driver with the special case. This is how the logic worked up until 2.6.24. Stephen this is not the problem. The problem is that the driver is doing a NAPI completion and re-enabling chip interrupts with work_done == weight, and that is illegal. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/7] [NETDEV]: tehuti Fix possible causing oops of net_rx_action
From: Stephen Hemminger [EMAIL PROTECTED] Date: Tue, 11 Dec 2007 21:39:39 -0800 On Wed, 12 Dec 2007 13:01:27 +0900 Joonwoo Park [EMAIL PROTECTED] wrote: [NETDEV]: tehuti Fix possible causing oops of net_rx_action Signed-off-by: Joonwoo Park [EMAIL PROTECTED] --- drivers/net/tehuti.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 21230c9..955e749 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -305,6 +305,8 @@ static int bdx_poll(struct napi_struct *napi, int budget) netif_rx_complete(dev, napi); bdx_enable_interrupts(priv); + if (unlikely(work_done == napi-weight)) + return work_done - 1; } return work_done; } A better fix would be not going over budget in the first place. That's not the problem. They are not going over the budget, rather, they are hitting the budget yet doing netif_rx_complete() as well which is illegal. Unless you strictly process less than weight packets, you must not netif_rx_complete() and re-enable chip interrupts. I can't believe people are trying to fix this bug like this. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/7] [NETDEV]: tehuti Fix possible causing oops of net_rx_action
From: Joonwoo Park [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 13:01:27 +0900 @@ -305,6 +305,8 @@ static int bdx_poll(struct napi_struct *napi, int budget) netif_rx_complete(dev, napi); bdx_enable_interrupts(priv); + if (unlikely(work_done == napi-weight)) + return work_done - 1; } return work_done; } Any time your trying to make a caller happy by adjusting a return value forcefully, it's a hack. And I stated this in another reply about this issue. Please do not fix the problem this way. The correct way to fix this is, if we did process a full weight or work, we should not netif_rx_complete() and we should not re-enable chip interrupts. Instead we should return the true work_done value and allow the caller to thus poll us one more time. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [NET]: Fix Ooops of napi net_rx_action.
From: Brandeburg, Jesse [EMAIL PROTECTED] Date: Tue, 11 Dec 2007 16:38:37 -0800 @@ -3933,6 +3933,10 @@ quit_polling: e1000_set_itr(adapter); netif_rx_complete(poll_dev, napi); e1000_irq_enable(adapter); + if (work_done == weight) + return work_done - 1; + else + return work_done; Don't do this. If you processed weight worth of packets, return that exact value and do not netif_rx_complete() and do not re-enable interrupts. That is the only correct fix. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH net-2.6.25 uncompilable] [TCP]: Avoid breaking GSOed skbs when SACKed one-by-one
From: Lachlan Andrew [EMAIL PROTECTED] Date: Tue, 11 Dec 2007 16:14:36 -0800 This thread started because TCP processing interferes with RTT estimation. This problem would be eliminated if time-stamping were done as soon as the packet comes off the NIC. We don't do that because such timestamping is too expensive. It used to be the case that we did this, but we stopped doing that a long time ago. On x86 for example, timestamping can involve touching a slow I/O device to read the timestamp. We do not want to do that for every packet. Also, we timestamp differently for TCP, the global high resolution timestamp is overkill for this purpose. Really, this is a silly idea and would only be a bandaid for the problem at hand, that TCP input processing is too expensive in certain circumstances. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PULL] SCTP bug fixes for net-2.6
Jijo Chacko wrote: Hello experts, [ sorry if my questions are irrelevent to any one of you ]. I am trying to develop a frame work, where i transport all HTTP traffic over SCTP. On user space, i can do this easily over SCTP, BSD-style sockets. If i am to write a kernel module(say in-kernel web client/server) using kernel version of SCTP interfaces, what are the available calls now supported ? I see in-kernel implementation like NFS/RPC make kernel version of UDP and TCP _sendmsg, i don't find similar interfaces for SCTP.. Look at fs/dlm/lowcomms.c All you need to do is use IPPROTO_SCTP when creating a socket in the kernel and then I believe you can use all the same calls as TCP/UDP. -vlad -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] qe: add function qe_clock_source()
Timur Tabi wrote: Add function qe_clock_source() which takes a string containing the name of a QE clock source (as is typically found in device trees) and returns the matching enum qe_clock value. Update booting-without-of.txt to indicate that the UCC properties rx-clock and tx-clock are deprecated and replaced with rx-clock-name and tx-clock-name, which use strings instead of numbers to indicate QE clock sources. Signed-off-by: Timur Tabi [EMAIL PROTECTED] If there are no objections, I'd like this patch to be pulled into 2.6.25. Thanks. -- Timur Tabi Linux kernel developer at Freescale -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] ucc_geth: use rx-clock-name and tx-clock-name device tree properties
Timur Tabi wrote: Updates the ucc_geth device driver to check the new rx-clock-name and tx-clock-name properties first. If present, it uses the new function qe_clock_source() to obtain the clock source. Otherwise, it checks the deprecated rx-clock and tx-clock properties. Update the device trees for 832x, 836x, and 8568 to contain the new property names only. Signed-off-by: Timur Tabi [EMAIL PROTECTED] If there are no objections, I'd like this patch to be pulled into 2.6.25. Thanks. -- Timur Tabi Linux kernel developer at Freescale -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] net: napi fix
From: Joonwoo Park [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 15:05:26 +0900 Could you explain how it fix the problem? IMHO I think your patch cannot solve the problem. The drivers can call netif_rx_complete and net_rx_action can do list_move_tail also. Stephen is confused about what the bug is in these drivers, that's all. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] IPIP: Allow rebinding the tunnel to another interface
Hello, Once created, an IP tunnel can't be bound to another device. (reported as https://bugzilla.redhat.com/show_bug.cgi?id=419671) To reproduce: # create a tunnel: ip tunnel add tunneltest0 mode ipip remote 10.0.0.1 dev eth0 # try to change the bounding device from eth0 to eth1: ip tunnel change tunneltest0 dev eth1 # show the result: ip tunnel show tunneltest0 tunneltest0: ip/ip remote 10.0.0.1 local any dev eth0 ttl inherit Notice the bound device has not changed from eth0 to eth1. This patch fixes it. When changing the binding, it also recalculates the MTU according to the new bound device's MTU. If the change is acceptable, I'll do the same for GRE and SIT tunnels. Signed-off-by: Michal Schmidt [EMAIL PROTECTED] diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 8c2b2b0..05b267b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -651,6 +651,40 @@ tx_error: return 0; } +static void ipip_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + + tunnel = netdev_priv(dev); + iph = tunnel-parms.iph; + + if (iph-daddr) { + struct flowi fl = { .oif = tunnel-parms.link, + .nl_u = { .ip4_u = + { .daddr = iph-daddr, + .saddr = iph-saddr, + .tos = RT_TOS(iph-tos) } }, + .proto = IPPROTO_IPIP }; + struct rtable *rt; + if (!ip_route_output_key(rt, fl)) { + tdev = rt-u.dst.dev; + ip_rt_put(rt); + } + dev-flags |= IFF_POINTOPOINT; + } + + if (!tdev tunnel-parms.link) + tdev = __dev_get_by_index(init_net, tunnel-parms.link); + + if (tdev) { + dev-hard_header_len = tdev-hard_header_len + sizeof(struct iphdr); + dev-mtu = tdev-mtu - sizeof(struct iphdr); + } + dev-iflink = tunnel-parms.link; +} + static int ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -723,6 +757,10 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t-parms.iph.ttl = p.iph.ttl; t-parms.iph.tos = p.iph.tos; t-parms.iph.frag_off = p.iph.frag_off; + if (t-parms.link != p.link) { + t-parms.link = p.link; + ipip_tunnel_bind_dev(dev); + } } if (copy_to_user(ifr-ifr_ifru.ifru_data, t-parms, sizeof(p))) err = -EFAULT; @@ -791,12 +829,9 @@ static void ipip_tunnel_setup(struct net_device *dev) static int ipip_tunnel_init(struct net_device *dev) { - struct net_device *tdev = NULL; struct ip_tunnel *tunnel; - struct iphdr *iph; tunnel = netdev_priv(dev); - iph = tunnel-parms.iph; tunnel-dev = dev; strcpy(tunnel-parms.name, dev-name); @@ -804,29 +839,7 @@ static int ipip_tunnel_init(struct net_device *dev) memcpy(dev-dev_addr, tunnel-parms.iph.saddr, 4); memcpy(dev-broadcast, tunnel-parms.iph.daddr, 4); - if (iph-daddr) { - struct flowi fl = { .oif = tunnel-parms.link, - .nl_u = { .ip4_u = - { .daddr = iph-daddr, - .saddr = iph-saddr, - .tos = RT_TOS(iph-tos) } }, - .proto = IPPROTO_IPIP }; - struct rtable *rt; - if (!ip_route_output_key(rt, fl)) { - tdev = rt-u.dst.dev; - ip_rt_put(rt); - } - dev-flags |= IFF_POINTOPOINT; - } - - if (!tdev tunnel-parms.link) - tdev = __dev_get_by_index(init_net, tunnel-parms.link); - - if (tdev) { - dev-hard_header_len = tdev-hard_header_len + sizeof(struct iphdr); - dev-mtu = tdev-mtu - sizeof(struct iphdr); - } - dev-iflink = tunnel-parms.link; + ipip_tunnel_bind_dev(dev); return 0; } -- 1.5.3.3 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [kvm-devel] [PATCH resent] virtio_net: Fix stalled inbound trafficon early packets
Christian Borntraeger wrote: Am Mittwoch, 12. Dezember 2007 schrieb Rusty Russell: On Wednesday 12 December 2007 00:16:12 Christian Borntraeger wrote: That would also work. We already have VRING_AVAIL_F_NO_INTERRUPT in virtio_ring.c - maybe we can use that. Its hidden in callback and restart handling, what about adding an explicit startup? Yes, I debated whether to make this a separate hook or not; the current method reduces the number of function calls without having two ways of disabling callbacks. In this case, simply starting devices with callbacks disabled and renaming 'restart' to 'enable' (or something) and calling it at the beginning is probably sufficient? So you suggest something like the following patch? It seems to work but there is still a theoretical race at startup. Therefore, I tend to agree with Dor that a separate hook seems prefereable, so I am not fully sure if the patch is the final solution: I think the change below handles the race. Otherwise please detail the use case. ps: Its ok to answer that after your vacation. --- drivers/block/virtio_blk.c |3 ++- drivers/net/virtio_net.c |5 - drivers/virtio/virtio_ring.c |9 - include/linux/virtio.h |4 ++-- 4 files changed, 12 insertions(+), 9 deletions(-) Index: kvm/drivers/virtio/virtio_ring.c === --- kvm.orig/drivers/virtio/virtio_ring.c +++ kvm/drivers/virtio/virtio_ring.c @@ -220,7 +220,7 @@ static void *vring_get_buf(struct virtqu return ret; } -static bool vring_restart(struct virtqueue *_vq) +static bool vring_enable(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -264,7 +264,7 @@ static struct virtqueue_ops vring_vq_ops .add_buf = vring_add_buf, .get_buf = vring_get_buf, .kick = vring_kick, - .restart = vring_restart, + .enable = vring_enable, .shutdown = vring_shutdown, }; @@ -299,9 +299,8 @@ struct virtqueue *vring_new_virtqueue(un vq-in_use = false; #endif - /* No callback? Tell other side not to bother us. */ - if (!callback) - vq-vring.avail-flags |= VRING_AVAIL_F_NO_INTERRUPT; + /* disable interrupts until we enable them */ + vq-vring.avail-flags |= VRING_AVAIL_F_NO_INTERRUPT; /* Put everything in free lists. */ vq-num_free = num; Index: kvm/include/linux/virtio.h === --- kvm.orig/include/linux/virtio.h +++ kvm/include/linux/virtio.h @@ -41,7 +41,7 @@ struct virtqueue * vq: the struct virtqueue we're talking about. * len: the length written into the buffer * Returns NULL or the data token handed to add_buf. - * @restart: restart callbacks after callback returned false. + * @enable: restart callbacks after callback returned false. * vq: the struct virtqueue we're talking about. * This returns false (and doesn't re-enable) if there are pending * buffers in the queue, to avoid a race. @@ -65,7 +65,7 @@ struct virtqueue_ops { void *(*get_buf)(struct virtqueue *vq, unsigned int *len); - bool (*restart)(struct virtqueue *vq); + bool (*enable)(struct virtqueue *vq); void (*shutdown)(struct virtqueue *vq); }; Index: kvm/drivers/net/virtio_net.c === --- kvm.orig/drivers/net/virtio_net.c +++ kvm/drivers/net/virtio_net.c @@ -201,7 +201,7 @@ again: /* Out of packets? */ if (received budget) { netif_rx_complete(vi-dev, napi); - if (unlikely(!vi-rvq-vq_ops-restart(vi-rvq)) + if (unlikely(!vi-rvq-vq_ops-enable(vi-rvq)) netif_rx_reschedule(vi-dev, napi)) goto again; } @@ -292,6 +292,9 @@ static int virtnet_open(struct net_devic return -ENOMEM; napi_enable(vi-napi); + + vi-rvq-vq_ops-enable(vi-rvq); + vi-svq-vq_ops-enable(vi-svq); If you change it to: if (!vi-rvq-vq_ops-enable(vi-rvq)) vi-rvq-vq_ops-kick(vi-rvq); if (!vi-rvq-vq_ops-enable(vi-svq)) vi-rvq-vq_ops-kick(vi-svq); You solve the race of packets already waiting in the queue without triggering the irq. The same for the block device. Regards, Dor. return 0; } Index: kvm/drivers/block/virtio_blk.c === --- kvm.orig/drivers/block/virtio_blk.c +++ kvm/drivers/block/virtio_blk.c @@ -183,7 +183,8 @@ static int virtblk_probe(struct virtio_d err = PTR_ERR(vblk-vq); goto out_free_vblk; } - + /* enable interrupts */ + vblk-vq-vq_ops-enable(vblk-vq); vblk-pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); if (!vblk-pool) { err = -ENOMEM; -- To unsubscribe from this list: send the line
Re: [PATCH] IPIP: Allow rebinding the tunnel to another interface
Michal Schmidt wrote: +static void ipip_tunnel_bind_dev(struct net_device *dev) +{ ... + dev-iflink = tunnel-parms.link; +} + static int ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -723,6 +757,10 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t-parms.iph.ttl = p.iph.ttl; t-parms.iph.tos = p.iph.tos; t-parms.iph.frag_off = p.iph.frag_off; + if (t-parms.link != p.link) { + t-parms.link = p.link; + ipip_tunnel_bind_dev(dev); + } If you change dev-iflink this should trigger a rtnetlink notification. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [IPROUTE]: Avoid overflow for larger rto_min in print_route
This includes a workaround for overflow while conversion of larger rto_min (e.g. 3s) unit. Signed-off-by: Satoru SATOH [EMAIL PROTECTED] ip/iproute.c |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ip/iproute.c b/ip/iproute.c index f4200ae..fa722c6 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -510,16 +510,16 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) fprintf(fp, %u, *(unsigned*)RTA_DATA(mxrta[i])); else { unsigned val = *(unsigned*)RTA_DATA(mxrta[i]); + unsigned hz1 = hz / 1000; - val *= 1000; if (i == RTAX_RTT) val /= 8; else if (i == RTAX_RTTVAR) val /= 4; - if (val = hz) - fprintf(fp, %ums, val/hz); + if (val = hz1) + fprintf(fp, %ums, val/hz1); else - fprintf(fp, %.2fms, (float)val/hz); + fprintf(fp, %.2fms, (float)val/hz1); } } } -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: ip neigh show not showing arp cache entries?
I retested it on an x86 machine and am seeing similar problems. First, arp gives the arp table as expected: [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 arp -n Address HWtype HWaddress Flags Mask Iface 172.24.0.9 ether 00:03:CC:51:06:5E C bond0 10.41.18.101 ether 00:0E:0C:5E:95:BD C eth6 172.24.137.0 ether 00:C0:8B:08:E4:88 C bond0 172.24.136.0 ether 00:C0:8B:07:B3:7E C bond0 10.41.18.1 ether 00:00:5E:00:01:01 C eth6 172.24.0.5 ether 00:01:AF:15:E0:6A C bond0 172.24.0.13 ether 00:0E:0C:85:FD:D2 C bond0 172.24.0.3 ether 00:01:AF:14:C8:CC C bond0 172.24.132.1 ether 00:01:AF:14:E9:88 C bond0 172.24.0.7 ether 00:07:E9:41:4B:B4 C bond0 192.168.24.81ether 00:01:AF:14:E9:8A C bond2 ip neigh show gives nothing, but if I search for specific addresses from the arp table listing they show up: [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show 172.24.0.9 172.24.0.9 dev bond0 lladdr 00:03:cc:51:06:5e DELAY [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show 10.41.18.101 10.41.18.101 dev eth6 lladdr 00:0e:0c:5e:95:bd REACHABLE [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show 172.24.137.0 172.24.137.0 dev bond0 lladdr 00:c0:8b:08:e4:88 REACHABLE Is this expected behaviour? Thanks, Chris -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IPIP: Allow rebinding the tunnel to another interface
On Wed, 12 Dec 2007 17:00:14 +0100 Patrick McHardy [EMAIL PROTECTED] wrote: If you change dev-iflink this should trigger a rtnetlink notification. OK, I've added netdev_state_change(dev). Here's the new patch. Once created, an IP tunnel can't be bound to another device. (reported as https://bugzilla.redhat.com/show_bug.cgi?id=419671) To reproduce: # create a tunnel: ip tunnel add tunneltest0 mode ipip remote 10.0.0.1 dev eth0 # try to change the bounding device from eth0 to eth1: ip tunnel change tunneltest0 dev eth1 # show the result: ip tunnel show tunneltest0 tunneltest0: ip/ip remote 10.0.0.1 local any dev eth0 ttl inherit Notice the bound device has not changed from eth0 to eth1. This patch fixes it. When changing the binding, it also recalculates the MTU according to the new bound device's MTU. If the change is acceptable, I'll do the same for GRE and SIT tunnels. Signed-off-by: Michal Schmidt [EMAIL PROTECTED] diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 8c2b2b0..160535b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -651,6 +651,40 @@ tx_error: return 0; } +static void ipip_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + + tunnel = netdev_priv(dev); + iph = tunnel-parms.iph; + + if (iph-daddr) { + struct flowi fl = { .oif = tunnel-parms.link, + .nl_u = { .ip4_u = + { .daddr = iph-daddr, + .saddr = iph-saddr, + .tos = RT_TOS(iph-tos) } }, + .proto = IPPROTO_IPIP }; + struct rtable *rt; + if (!ip_route_output_key(rt, fl)) { + tdev = rt-u.dst.dev; + ip_rt_put(rt); + } + dev-flags |= IFF_POINTOPOINT; + } + + if (!tdev tunnel-parms.link) + tdev = __dev_get_by_index(init_net, tunnel-parms.link); + + if (tdev) { + dev-hard_header_len = tdev-hard_header_len + sizeof(struct iphdr); + dev-mtu = tdev-mtu - sizeof(struct iphdr); + } + dev-iflink = tunnel-parms.link; +} + static int ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -723,6 +757,11 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t-parms.iph.ttl = p.iph.ttl; t-parms.iph.tos = p.iph.tos; t-parms.iph.frag_off = p.iph.frag_off; + if (t-parms.link != p.link) { + t-parms.link = p.link; + ipip_tunnel_bind_dev(dev); + netdev_state_change(dev); + } } if (copy_to_user(ifr-ifr_ifru.ifru_data, t-parms, sizeof(p))) err = -EFAULT; @@ -791,12 +830,9 @@ static void ipip_tunnel_setup(struct net_device *dev) static int ipip_tunnel_init(struct net_device *dev) { - struct net_device *tdev = NULL; struct ip_tunnel *tunnel; - struct iphdr *iph; tunnel = netdev_priv(dev); - iph = tunnel-parms.iph; tunnel-dev = dev; strcpy(tunnel-parms.name, dev-name); @@ -804,29 +840,7 @@ static int ipip_tunnel_init(struct net_device *dev) memcpy(dev-dev_addr, tunnel-parms.iph.saddr, 4); memcpy(dev-broadcast, tunnel-parms.iph.daddr, 4); - if (iph-daddr) { - struct flowi fl = { .oif = tunnel-parms.link, - .nl_u = { .ip4_u = - { .daddr = iph-daddr, - .saddr = iph-saddr, - .tos = RT_TOS(iph-tos) } }, - .proto = IPPROTO_IPIP }; - struct rtable *rt; - if (!ip_route_output_key(rt, fl)) { - tdev = rt-u.dst.dev; - ip_rt_put(rt); - } - dev-flags |= IFF_POINTOPOINT; - } - - if (!tdev tunnel-parms.link) - tdev = __dev_get_by_index(init_net, tunnel-parms.link); - - if (tdev) { - dev-hard_header_len = tdev-hard_header_len + sizeof(struct iphdr); - dev-mtu = tdev-mtu - sizeof(struct iphdr); - } - dev-iflink = tunnel-parms.link; + ipip_tunnel_bind_dev(dev); return 0; } -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] [NETDEV] sky2: rtnl_lock out of loop will be faster
On Wed, 12 Dec 2007 16:50:09 +0800 Wang Chen [EMAIL PROTECTED] wrote: [PATCH 4/4] [NETDEV] sky2: rtnl_lock out of loop will be faster Before this patch, it gets and releases the lock at each iteration of the loop. Changing unregister_netdev to unregister_netdevice and locking outside of the loop will be faster for this approach. Signed-off-by: Wang Chen [EMAIL PROTECTED] --- sky2.c |4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) --- linux-2.6.24.rc5.org/drivers/net/sky2.c 2007-12-12 10:19:43.0 +0800 +++ linux-2.6.24.rc5/drivers/net/sky2.c 2007-12-12 15:23:37.0 +0800 @@ -4270,8 +4270,10 @@ static void __devexit sky2_remove(struct del_timer_sync(hw-watchdog_timer); cancel_work_sync(hw-restart_work); + rtnl_lock(); for (i = hw-ports-1; i = 0; --i) - unregister_netdev(hw-dev[i]); + unregister_netdevice(hw-dev[i]); + rtnl_unlock(); sky2_write32(hw, B0_IMSK, 0); Umm, okay but it doesn't matter really, there can only be 2 ports and 90+% of the boards only have one port. So why bother?? -- Stephen Hemminger [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [kvm-devel] [virtio-net][PATCH] Don't arm tx hrtimer with a constant 500us each transmit
Am Mittwoch, 12. Dezember 2007 schrieb Dor Laor: Christian Borntraeger wrote: Am Mittwoch, 12. Dezember 2007 schrieb Dor Laor: --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -406,10 +405,10 @@ again: Hmm, while I agree in general with the patch, I fail to find the proper version of virtio_net where this patch applies. I tried kvm.git and linux-2.6.git from kernel.org. Can you give me a pointer to the repository where you work on virtio? Sorry for that, I added some debug prints of my one. Here it is: *git clone git*://kvm.*qumranet*.com/home/*dor*/src/linux-2.6-nv use branch 'virtio'. Ah, ok. I will look into that branch. BTW: what git repository do you use? I use Avis git from kernel.org: git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm Christian -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IPIP: Allow rebinding the tunnel to another interface
Michal Schmidt wrote: On Wed, 12 Dec 2007 17:00:14 +0100 Patrick McHardy [EMAIL PROTECTED] wrote: If you change dev-iflink this should trigger a rtnetlink notification. OK, I've added netdev_state_change(dev). Here's the new patch. Looks good to me, thanks. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/7] [TFRC]: Loss interval code needs the macros/inlines that were moved
From: Gerrit Renker [EMAIL PROTECTED] This moves the inlines (which were previously declared as macros) back into packet_history.h since the loss detection code needs to be able to read entries from the RX history in order to create the relevant loss entries: it needs at least tfrc_rx_hist_loss_prev() and tfrc_rx_hist_last_rcv(), which in turn require the definition of the other inlines (macros). Signed-off-by: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] --- net/dccp/ccids/lib/packet_history.c | 35 --- net/dccp/ccids/lib/packet_history.h | 35 +++ 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 727b17d..dd2cf2d 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -151,23 +151,6 @@ void tfrc_rx_packet_history_exit(void) } } -/** - * tfrc_rx_hist_index - index to reach n-th entry after loss_start - */ -static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n) -{ - return (h-loss_start + n) TFRC_NDUPACK; -} - -/** - * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h) -{ - return h-ring[tfrc_rx_hist_index(h, h-loss_count)]; -} - void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u32 ndp) @@ -183,24 +166,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, } EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet); -/** - * tfrc_rx_hist_entry - return the n-th history entry after loss_start - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n) -{ - return h-ring[tfrc_rx_hist_index(h, n)]; -} - -/** - * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected - */ -static inline struct tfrc_rx_hist_entry * - tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h) -{ - return h-ring[h-loss_start]; -} - /* has the packet contained in skb been seen before? */ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) { diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 3dfd182..e58b0fc 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -84,6 +84,41 @@ struct tfrc_rx_hist { #define rtt_sample_prev loss_start }; +/** + * tfrc_rx_hist_index - index to reach n-th entry after loss_start + */ +static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n) +{ + return (h-loss_start + n) TFRC_NDUPACK; +} + +/** + * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far + */ +static inline struct tfrc_rx_hist_entry * + tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h) +{ + return h-ring[tfrc_rx_hist_index(h, h-loss_count)]; +} + +/** + * tfrc_rx_hist_entry - return the n-th history entry after loss_start + */ +static inline struct tfrc_rx_hist_entry * + tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n) +{ + return h-ring[tfrc_rx_hist_index(h, n)]; +} + +/** + * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected + */ +static inline struct tfrc_rx_hist_entry * + tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h) +{ + return h-ring[h-loss_start]; +} + extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u32 ndp); -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC] ehea: kdump support using new shutdown hook
This patch adds kdump support using the new PPC crash shutdown hook to the ehea driver. The driver now keeps a list of firmware handles which have to be freed in case of a crash. The crash handler does the minimum required: it frees the firmware resource handles plus broadcast/multicast registrations. Please comment. Shutdown hook patches: http://ozlabs.org/pipermail/linuxppc-dev/2007-December/048058.html http://ozlabs.org/pipermail/linuxppc-dev/2007-December/048059.html Signed-off-by: Thomas Klein [EMAIL PROTECTED] --- diff -Nurp -X dontdiff linux-2.6.24-rc5/drivers/net/ehea/ehea.h patched_kernel/drivers/net/ehea/ehea.h --- linux-2.6.24-rc5/drivers/net/ehea/ehea.h2007-12-11 04:48:43.0 +0100 +++ patched_kernel/drivers/net/ehea/ehea.h 2007-12-12 17:30:53.0 +0100 @@ -40,7 +40,7 @@ #include asm/io.h #define DRV_NAME ehea -#define DRV_VERSIONEHEA_0083 +#define DRV_VERSIONEHEA_0084 /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 @@ -386,6 +386,7 @@ struct ehea_port_res { #define EHEA_MAX_PORTS 16 +#define EHEA_MAX_RES_HANDLES (100 * EHEA_MAX_PORTS + 10) struct ehea_adapter { u64 handle; struct of_device *ofdev; @@ -397,6 +398,7 @@ struct ehea_adapter { u64 max_mc_mac;/* max number of multicast mac addresses */ int active_ports; struct list_head list; + u64 res_handles[EHEA_MAX_RES_HANDLES]; }; diff -Nurp -X dontdiff linux-2.6.24-rc5/drivers/net/ehea/ehea_main.c patched_kernel/drivers/net/ehea/ehea_main.c --- linux-2.6.24-rc5/drivers/net/ehea/ehea_main.c 2007-12-11 04:48:43.0 +0100 +++ patched_kernel/drivers/net/ehea/ehea_main.c 2007-12-12 17:30:53.0 +0100 @@ -35,6 +35,7 @@ #include linux/if_ether.h #include linux/notifier.h #include linux/reboot.h +#include asm-powerpc/kexec.h #include net/ip.h @@ -2256,6 +2257,33 @@ static int ehea_clean_all_portres(struct return ret; } +static void ehea_update_adapter_handles(struct ehea_adapter *adapter) +{ + int i, k; + int j = 0; + + memset(adapter-res_handles, sizeof(adapter-res_handles), 0); + + for (k = 0; k EHEA_MAX_PORTS; k++) { + struct ehea_port *port = adapter-port[k]; + + if (!port || (port-state != EHEA_PORT_UP)) + continue; + + for(i = 0; i port-num_def_qps + port-num_add_tx_qps; i++) { + struct ehea_port_res *pr = port-port_res[i]; + + adapter-res_handles[j++] = pr-qp-fw_handle; + adapter-res_handles[j++] = pr-send_cq-fw_handle; + adapter-res_handles[j++] = pr-recv_cq-fw_handle; + adapter-res_handles[j++] = pr-eq-fw_handle; + adapter-res_handles[j++] = pr-send_mr.handle; + adapter-res_handles[j++] = pr-recv_mr.handle; + } + adapter-res_handles[j++] = port-qp_eq-fw_handle; + } +} + static void ehea_remove_adapter_mr(struct ehea_adapter *adapter) { if (adapter-active_ports) @@ -2318,6 +2346,7 @@ static int ehea_up(struct net_device *de ret = 0; port-state = EHEA_PORT_UP; + ehea_update_adapter_handles(port-adapter); goto out; out_free_irqs: @@ -2387,6 +2416,8 @@ static int ehea_down(struct net_device * ehea_info(Failed freeing resources for %s. ret=%i, dev-name, ret); + ehea_update_adapter_handles(port-adapter); + return ret; } @@ -3302,6 +,71 @@ static int __devexit ehea_remove(struct return 0; } +void ehea_crash_deregister(void) +{ + struct ehea_adapter *adapter; + int i; + u64 hret; + u8 reg_type; + + list_for_each_entry(adapter, adapter_list, list) { + for (i = 0; i EHEA_MAX_PORTS; i++) { + struct ehea_port *port = adapter-port[i]; + if (port-state == EHEA_PORT_UP) { + struct ehea_mc_list *mc_entry = port-mc_list; + struct list_head *pos; + struct list_head *temp; + + /* Undo multicast registrations */ + list_for_each_safe(pos, temp, + (port-mc_list-list)) { + mc_entry = list_entry(pos, + struct ehea_mc_list, + list); + ehea_multicast_reg_helper(port, + mc_entry-macaddr, + H_DEREG_BCMC); + } + + /* Undo broad registration */ +
Re: [PATCH 8/8] [PATCH v2] [CCID3]: Interface CCID3 code with newer Loss Intervals Database
| This time around I'm not doing any reordering, just trying to use your | patches as is, but adding this patch as-is produces a kernel that will | crash, no? | | The loss history and the RX/TX packet history slabs are all created in | tfrc.c using the three different __init routines of the dccp_tfrc_lib. | | Yes, the init routines are called and in turn they create the slab | caches, but up to the patch [PATCH 8/8] [PATCH v2] [CCID3]: Interface | CCID3 code with newer Loss Intervals Database the new li slab is not | being created, no? See what I'm talking? | Sorry, there is some weird kind of mix-up going on. Can you please check your patch set: it seems this email exchange refers to an older variant. In the most recent patch set, the slab is introduced in the patch [TFRC]: Ringbuffer to track loss interval history --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -27,6 +23,54 @@ struct dccp_li_hist_entry { u32 dccplih_interval; }; +static struct kmem_cache *tfrc_lh_slab __read_mostly;/* === */ +/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */ +static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 }; // ... And this is 6/8, i.e. before 8/8, cf. http://www.mail-archive.com/[EMAIL PROTECTED]/msg03000.html I don't know which tree you are working off, would it be possible to check against the test tree git://eden-feed.erg.abdn.ac.uk/dccp_exp [dccp] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHES 0/7]: DCCP patches for 2.6.25
From: Arnaldo Carvalho de Melo [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 14:36:46 -0200 Please consider pulling from: master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25 Pulled and pushed out to net-2.6.25, thanks! -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] iproute2: support dotted-quad netmask notation.
On Wed, 12 Dec 2007 12:55:13 +0100 Andreas Henriksson [EMAIL PROTECTED] wrote: On Tue, Dec 11, 2007 at 05:14:06PM -0800, Stephen Hemminger wrote: On Sun, 09 Dec 2007 18:10:22 +0100 Andreas Henriksson [EMAIL PROTECTED] wrote: I think both previous patches where broken on big-endian platforms. Here's an updated patch again. I'm very sorry for the inconvenience! [...] + *val=0; + for (mask = ntohl(addr.data[0]); mask; mask = 1) + (*val)++; [...] applied Just to make sure It looks on git.kernel.org like you applied the wrong patch. (Maybe you just haven't pushed out the latest changes there yet.) Please double-check that you actually applied the latest version (which is the one in the mail you replied applied to, important part quoted above). Actually, I took your logic and moved it to a new function: static unsigned cidr(const inet_prefix *addr) { unsigned bits = 0; u_int32_t mask; for (mask = ntohl(addr-data[0]); mask; mask = 1) ++bits; return bits; } -- Stephen Hemminger [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/7] [TFRC]: Ringbuffer to track loss interval history
From: Gerrit Renker [EMAIL PROTECTED] A ringbuffer-based implementation of loss interval history is easier to maintain, allocate, and update. The `swap' routine to keep the RX history sorted is due to and was written by Arnaldo Carvalho de Melo, simplifying an earlier macro-based variant. Details: * access to the Loss Interval Records via macro wrappers (with safety checks); * simplified, on-demand allocation of entries (no extra memory consumption on lossless links); cache allocation is local to the module / exported as service; * provision of RFC-compliant algorithm to re-compute average loss interval; * provision of comprehensive, new loss detection algorithm - support for all cases of loss, including re-ordered/duplicate packets; - waiting for NDUPACK=3 packets to fill the hole; - updating loss records when a late-arriving packet fills a hole. Signed-off-by: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Ian McDonald [EMAIL PROTECTED] Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] --- net/dccp/ccids/lib/loss_interval.c | 161 +- net/dccp/ccids/lib/loss_interval.h | 56 +- net/dccp/ccids/lib/packet_history.c | 218 ++- net/dccp/ccids/lib/packet_history.h | 11 +- net/dccp/ccids/lib/tfrc.h |3 + 5 files changed, 435 insertions(+), 14 deletions(-) diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index c0a933a..39980d1 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -1,6 +1,7 @@ /* * net/dccp/ccids/lib/loss_interval.c * + * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. * Copyright (c) 2005-7 Ian McDonald [EMAIL PROTECTED] * Copyright (c) 2005 Arnaldo Carvalho de Melo [EMAIL PROTECTED] @@ -10,12 +11,7 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ - -#include linux/module.h #include net/sock.h -#include ../../dccp.h -#include loss_interval.h -#include packet_history.h #include tfrc.h #define DCCP_LI_HIST_IVAL_F_LENGTH 8 @@ -27,6 +23,54 @@ struct dccp_li_hist_entry { u32 dccplih_interval; }; +static struct kmem_cache *tfrc_lh_slab __read_mostly; +/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */ +static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 }; + +/* implements LIFO semantics on the array */ +static inline u8 LIH_INDEX(const u8 ctr) +{ + return (LIH_SIZE - 1 - (ctr % LIH_SIZE)); +} + +/* the `counter' index always points at the next entry to be populated */ +static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist *lh) +{ + return lh-counter ? lh-ring[LIH_INDEX(lh-counter - 1)] : NULL; +} + +/* given i with 0 = i = k, return I_i as per the rfc3448bis notation */ +static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i) +{ + BUG_ON(i = lh-counter); + return lh-ring[LIH_INDEX(lh-counter - i - 1)]-li_length; +} + +/* + * On-demand allocation and de-allocation of entries + */ +static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist *lh) +{ + if (lh-ring[LIH_INDEX(lh-counter)] == NULL) + lh-ring[LIH_INDEX(lh-counter)] = kmem_cache_alloc(tfrc_lh_slab, + GFP_ATOMIC); + return lh-ring[LIH_INDEX(lh-counter)]; +} + +void tfrc_lh_cleanup(struct tfrc_loss_hist *lh) +{ + if (!tfrc_lh_is_initialised(lh)) + return; + + for (lh-counter = 0; lh-counter LIH_SIZE; lh-counter++) + if (lh-ring[LIH_INDEX(lh-counter)] != NULL) { + kmem_cache_free(tfrc_lh_slab, + lh-ring[LIH_INDEX(lh-counter)]); + lh-ring[LIH_INDEX(lh-counter)] = NULL; + } +} +EXPORT_SYMBOL_GPL(tfrc_lh_cleanup); + static struct kmem_cache *dccp_li_cachep __read_mostly; static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) @@ -98,6 +142,65 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); +static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) +{ + u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; + int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */ + + for (i=0; i = k; i++) { + i_i = tfrc_lh_get_interval(lh, i); + + if (i k) { + i_tot0 += i_i * tfrc_lh_weights[i]; + w_tot += tfrc_lh_weights[i]; + } + if (i 0) + i_tot1 += i_i * tfrc_lh_weights[i-1]; + } + + BUG_ON(w_tot == 0); + lh-i_mean = max(i_tot0, i_tot1) / w_tot; +} + +/** + *
[PATCH 6/7] [CCID3]: Interface CCID3 code with newer Loss Intervals Database
From: Gerrit Renker [EMAIL PROTECTED] This hooks up the TFRC Loss Interval database with CCID 3 packet reception. In addition, it makes the CCID-specific computation of the first loss interval (which requires access to all the guts of CCID3) local to ccid3.c. The patch also fixes an omission in the DCCP code, that of a default / fallback RTT value (defined in section 3.4 of RFC 4340 as 0.2 sec); while at it, the upper bound of 4 seconds for an RTT sample has been reduced to match the initial TCP RTO value of 3 seconds from[RFC 1122, 4.2.3.1]. Signed-off-by: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Ian McDonald [EMAIL PROTECTED] Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] --- net/dccp/ccids/ccid3.c | 72 ++-- net/dccp/ccids/ccid3.h | 10 ++-- net/dccp/ccids/lib/loss_interval.c | 18 net/dccp/ccids/lib/tfrc.c | 10 ++-- net/dccp/dccp.h|7 ++- 5 files changed, 84 insertions(+), 33 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index b92069b..a818a1e 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1,6 +1,7 @@ /* * net/dccp/ccids/ccid3.c * + * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. * Copyright (c) 2005-7 Ian McDonald [EMAIL PROTECTED] * @@ -33,11 +34,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include ../ccid.h #include ../dccp.h -#include lib/packet_history.h -#include lib/loss_interval.h -#include lib/tfrc.h #include ccid3.h #include asm/unaligned.h @@ -757,6 +754,46 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) return 0; } +/** ccid3_first_li - Implements [RFC 3448, 6.3.1] + * + * Determine the length of the first loss interval via inverse lookup. + * Assume that X_recv can be computed by the throughput equation + * s + * X_recv = + * R * fval + * Find some p such that f(p) = fval; return 1/p (scaled). + */ +static u32 ccid3_first_li(struct sock *sk) +{ + struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + u32 x_recv, p, delta; + u64 fval; + + if (hcrx-ccid3hcrx_rtt == 0) { + DCCP_WARN(No RTT estimate available, using fallback RTT\n); + hcrx-ccid3hcrx_rtt = DCCP_FALLBACK_RTT; + } + + delta = ktime_to_us(net_timedelta(hcrx-ccid3hcrx_tstamp_last_feedback)); + x_recv = scaled_div32(hcrx-ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN(X_recv==0\n); + if ((x_recv = hcrx-ccid3hcrx_x_recv) == 0) { + DCCP_BUG(stored value of X_recv is zero); + return ~0U; + } + } + + fval = scaled_div(hcrx-ccid3hcrx_s, hcrx-ccid3hcrx_rtt); + fval = scaled_div32(fval, x_recv); + p = tfrc_calc_x_reverse_lookup(fval); + + ccid3_pr_debug(%s(%p), receive rate=%u bytes/s, implied + loss rate=%u\n, dccp_role(sk), sk, x_recv, p); + + return p == 0 ? ~0U : scaled_div(1, p); +} + static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); @@ -794,6 +831,14 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* * Handle pending losses and otherwise check for new loss */ + if (tfrc_rx_hist_loss_pending(hcrx-ccid3hcrx_hist) + tfrc_rx_handle_loss(hcrx-ccid3hcrx_hist, + hcrx-ccid3hcrx_li_hist, + skb, ndp, ccid3_first_li, sk) ) { + do_feedback = CCID3_FBACK_PARAM_CHANGE; + goto done_receiving; + } + if (tfrc_rx_hist_new_loss_indicated(hcrx-ccid3hcrx_hist, skb, ndp)) goto update_records; @@ -803,7 +848,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(!is_data_packet)) goto update_records; - if (list_empty(hcrx-ccid3hcrx_li_hist)) { /* no loss so far: p = 0 */ + if (!tfrc_lh_is_initialised(hcrx-ccid3hcrx_li_hist)) { const u32 sample = tfrc_rx_hist_sample_rtt(hcrx-ccid3hcrx_hist, skb); /* * Empty loss history: no loss so far, hence p stays 0. @@ -812,6 +857,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) */ if (sample != 0) hcrx-ccid3hcrx_rtt = tfrc_ewma(hcrx-ccid3hcrx_rtt, sample, 9); + + } else if (tfrc_lh_update_i_mean(hcrx-ccid3hcrx_li_hist, skb)) { + /* +* Step (3) of
[PATCH 4/7] [CCID3]: Redundant debugging output / documentation
From: Gerrit Renker [EMAIL PROTECTED] Each time feedback is sent two lines are printed: ccid3_hc_rx_send_feedback: client ... - entry ccid3_hc_rx_send_feedback: Interval ...usec, X_recv=..., 1/p=... The first line is redundant and thus removed. Further, documentation of ccid3_hc_rx_sock (capitalisation) is made consistent. Signed-off-by: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] --- net/dccp/ccids/ccid3.c |2 -- net/dccp/ccids/ccid3.h |4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 60fcb31..b92069b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -685,8 +685,6 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, ktime_t now; s64 delta = 0; - ccid3_pr_debug(%s(%p) - entry \n, dccp_role(sk), sk); - if (unlikely(hcrx-ccid3hcrx_state == TFRC_RSTATE_TERM)) return; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 3c33dc6..6ceeb80 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -135,9 +135,9 @@ enum ccid3_hc_rx_states { * * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) - * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4) + * @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4) * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1) - * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states + * @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/7] [TFRC]: Put RX/TX initialisation into tfrc.c
From: Gerrit Renker [EMAIL PROTECTED] This separates RX/TX initialisation and puts all packet history / loss intervals initialisation into tfrc.c. The organisation is uniform: slab declaration - {rx,tx}_init() - {rx,tx}_exit() Signed-off-by: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] --- net/dccp/ccids/lib/packet_history.c | 68 -- net/dccp/ccids/lib/tfrc.c | 31 2 files changed, 55 insertions(+), 44 deletions(-) diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index af44082..727b17d 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -57,6 +57,22 @@ struct tfrc_tx_hist_entry { */ static struct kmem_cache *tfrc_tx_hist_slab; +int __init tfrc_tx_packet_history_init(void) +{ + tfrc_tx_hist_slab = kmem_cache_create(tfrc_tx_hist, + sizeof(struct tfrc_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, NULL); + return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0; +} + +void tfrc_tx_packet_history_exit(void) +{ + if (tfrc_tx_hist_slab != NULL) { + kmem_cache_destroy(tfrc_tx_hist_slab); + tfrc_tx_hist_slab = NULL; + } +} + static struct tfrc_tx_hist_entry * tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) { @@ -119,6 +135,22 @@ EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); */ static struct kmem_cache *tfrc_rx_hist_slab; +int __init tfrc_rx_packet_history_init(void) +{ + tfrc_rx_hist_slab = kmem_cache_create(tfrc_rxh_cache, + sizeof(struct tfrc_rx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, NULL); + return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0; +} + +void tfrc_rx_packet_history_exit(void) +{ + if (tfrc_rx_hist_slab != NULL) { + kmem_cache_destroy(tfrc_rx_hist_slab); + tfrc_rx_hist_slab = NULL; + } +} + /** * tfrc_rx_hist_index - index to reach n-th entry after loss_start */ @@ -316,39 +348,3 @@ keep_ref_for_next_time: return sample; } EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); - -__init int packet_history_init(void) -{ - tfrc_tx_hist_slab = kmem_cache_create(tfrc_tx_hist, - sizeof(struct tfrc_tx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (tfrc_tx_hist_slab == NULL) - goto out_err; - - tfrc_rx_hist_slab = kmem_cache_create(tfrc_rx_hist, - sizeof(struct tfrc_rx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (tfrc_rx_hist_slab == NULL) - goto out_free_tx; - - return 0; - -out_free_tx: - kmem_cache_destroy(tfrc_tx_hist_slab); - tfrc_tx_hist_slab = NULL; -out_err: - return -ENOBUFS; -} - -void packet_history_exit(void) -{ - if (tfrc_tx_hist_slab != NULL) { - kmem_cache_destroy(tfrc_tx_hist_slab); - tfrc_tx_hist_slab = NULL; - } - - if (tfrc_rx_hist_slab != NULL) { - kmem_cache_destroy(tfrc_rx_hist_slab); - tfrc_rx_hist_slab = NULL; - } -} diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 3a7a183..20763fa 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -14,27 +14,42 @@ module_param(tfrc_debug, bool, 0444); MODULE_PARM_DESC(tfrc_debug, Enable debug messages); #endif +extern int tfrc_tx_packet_history_init(void); +extern void tfrc_tx_packet_history_exit(void); +extern int tfrc_rx_packet_history_init(void); +extern void tfrc_rx_packet_history_exit(void); + extern int dccp_li_init(void); extern void dccp_li_exit(void); -extern int packet_history_init(void); -extern void packet_history_exit(void); static int __init tfrc_module_init(void) { int rc = dccp_li_init(); - if (rc == 0) { - rc = packet_history_init(); - if (rc != 0) - dccp_li_exit(); - } + if (rc) + goto out; + + rc = tfrc_tx_packet_history_init(); + if (rc) + goto out_free_loss_intervals; + rc = tfrc_rx_packet_history_init(); + if (rc) + goto out_free_tx_history; + return 0; + +out_free_tx_history: + tfrc_tx_packet_history_exit(); +out_free_loss_intervals: + dccp_li_exit(); +out: return rc; } static void __exit tfrc_module_exit(void) { - packet_history_exit(); + tfrc_rx_packet_history_exit(); + tfrc_tx_packet_history_exit(); dccp_li_exit(); } -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED]
[PATCH 5/7] [TFRC]: CCID3 (and CCID4) needs to access these inlines
From: Gerrit Renker [EMAIL PROTECTED] This moves two inlines back to packet_history.h: these are not private to packet_history.c, but are needed by CCID3/4 to detect whether a new loss is indicated, or whether a loss is already pending. Signed-off-by: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] --- net/dccp/ccids/lib/packet_history.c | 26 -- net/dccp/ccids/lib/packet_history.h | 35 +++ 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 5b10a1e..20af1a6 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -191,32 +191,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); -/* initialise loss detection and disable RTT sampling */ -static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h) -{ - h-loss_count = 1; -} - -/* indicate whether previously a packet was detected missing */ -static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) -{ - return h-loss_count; -} - -/* any data packets missing between last reception and skb ? */ -int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h, - const struct sk_buff *skb, u32 ndp) -{ - int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)-tfrchrx_seqno, -DCCP_SKB_CB(skb)-dccpd_seq); - - if (delta 1 ndp delta) - tfrc_rx_hist_loss_indicated(h); - - return tfrc_rx_hist_loss_pending(h); -} -EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated); - static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { const u8 idx_a = tfrc_rx_hist_index(h, a), diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 24edd8d..c7eeda4 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -118,16 +118,43 @@ static inline struct tfrc_rx_hist_entry * return h-ring[h-loss_start]; } +/* initialise loss detection and disable RTT sampling */ +static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h) +{ + h-loss_count = 1; +} + +/* indicate whether previously a packet was detected missing */ +static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) +{ + return h-loss_count; +} + +/* any data packets missing between last reception and skb ? */ +static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h, + const struct sk_buff *skb, + u32 ndp) +{ + int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)-tfrchrx_seqno, +DCCP_SKB_CB(skb)-dccpd_seq); + + if (delta 1 ndp delta) + tfrc_rx_hist_loss_indicated(h); + + return tfrc_rx_hist_loss_pending(h); +} + extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u32 ndp); extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); -extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h, - const struct sk_buff *skb, u32 ndp); + struct tfrc_loss_hist; -extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *, struct tfrc_loss_hist *, +extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, + struct tfrc_loss_hist *lh, struct sk_buff *skb, u32 ndp, - u32 (*first_li)(struct sock *), struct sock *); + u32 (*first_li)(struct sock *sk), + struct sock *sk); extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); -- 1.5.3.4 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHES 0/7]: DCCP patches for 2.6.25
Hi David, Please consider pulling from: master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25 Best Regards, - Arnaldo b/net/dccp/ccids/ccid3.c |2 b/net/dccp/ccids/ccid3.h |5 b/net/dccp/ccids/lib/loss_interval.c | 161 ++- b/net/dccp/ccids/lib/loss_interval.h | 56 ++ b/net/dccp/ccids/lib/packet_history.c | 68 +++- b/net/dccp/ccids/lib/packet_history.h | 36 b/net/dccp/ccids/lib/tfrc.c | 32 ++- b/net/dccp/ccids/lib/tfrc.h |4 b/net/dccp/dccp.h |8 net/dccp/ccids/ccid3.c| 72 +++- net/dccp/ccids/ccid3.h| 10 - net/dccp/ccids/lib/loss_interval.c| 284 +- net/dccp/ccids/lib/loss_interval.h| 11 - net/dccp/ccids/lib/packet_history.c | 279 + net/dccp/ccids/lib/packet_history.h | 47 - net/dccp/ccids/lib/tfrc.c | 10 - 16 files changed, 643 insertions(+), 442 deletions(-) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/7] [TFRC]: Remove previous loss intervals implementation
From: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Gerrit Renker [EMAIL PROTECTED] Signed-off-by: Ian McDonald [EMAIL PROTECTED] Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED] --- net/dccp/ccids/lib/loss_interval.c | 266 net/dccp/ccids/lib/loss_interval.h | 10 +-- 2 files changed, 1 insertions(+), 275 deletions(-) diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 8b962c1..849e181 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -14,15 +14,6 @@ #include net/sock.h #include tfrc.h -#define DCCP_LI_HIST_IVAL_F_LENGTH 8 - -struct dccp_li_hist_entry { - struct list_head dccplih_node; - u64 dccplih_seqno:48, -dccplih_win_count:4; - u32 dccplih_interval; -}; - static struct kmem_cache *tfrc_lh_slab __read_mostly; /* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */ static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 }; @@ -71,77 +62,6 @@ void tfrc_lh_cleanup(struct tfrc_loss_hist *lh) } EXPORT_SYMBOL_GPL(tfrc_lh_cleanup); -static struct kmem_cache *dccp_li_cachep __read_mostly; - -static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) -{ - return kmem_cache_alloc(dccp_li_cachep, prio); -} - -static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(dccp_li_cachep, entry); -} - -void dccp_li_hist_purge(struct list_head *list) -{ - struct dccp_li_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, list, dccplih_node) { - list_del_init(entry-dccplih_node); - kmem_cache_free(dccp_li_cachep, entry); - } -} - -EXPORT_SYMBOL_GPL(dccp_li_hist_purge); - -/* Weights used to calculate loss event rate */ -/* - * These are integers as per section 8 of RFC3448. We can then divide by 4 * - * when we use it. - */ -static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = { - 4, 4, 4, 4, 3, 2, 1, 1, -}; - -u32 dccp_li_hist_calc_i_mean(struct list_head *list) -{ - struct dccp_li_hist_entry *li_entry, *li_next; - int i = 0; - u32 i_tot; - u32 i_tot0 = 0; - u32 i_tot1 = 0; - u32 w_tot = 0; - - list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { - if (li_entry-dccplih_interval != ~0U) { - i_tot0 += li_entry-dccplih_interval * dccp_li_hist_w[i]; - w_tot += dccp_li_hist_w[i]; - if (i != 0) - i_tot1 += li_entry-dccplih_interval * dccp_li_hist_w[i - 1]; - } - - - if (++i DCCP_LI_HIST_IVAL_F_LENGTH) - break; - } - - if (i != DCCP_LI_HIST_IVAL_F_LENGTH) - return 0; - - i_tot = max(i_tot0, i_tot1); - - if (!w_tot) { - DCCP_WARN(w_tot = 0\n); - return 1; - } - - return i_tot / w_tot; -} - -EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); - static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) { u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; @@ -201,192 +121,6 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); -static int dccp_li_hist_interval_new(struct list_head *list, -const u64 seq_loss, const u8 win_loss) -{ - struct dccp_li_hist_entry *entry; - int i; - - for (i = 0; i DCCP_LI_HIST_IVAL_F_LENGTH; i++) { - entry = dccp_li_hist_entry_new(GFP_ATOMIC); - if (entry == NULL) { - dccp_li_hist_purge(list); - DCCP_BUG(loss interval list entry is NULL); - return 0; - } - entry-dccplih_interval = ~0; - list_add(entry-dccplih_node, list); - } - - entry-dccplih_seqno = seq_loss; - entry-dccplih_win_count = win_loss; - return 1; -} - -/* calculate first loss interval - * - * returns estimated loss interval in usecs */ -static u32 dccp_li_calc_first_li(struct sock *sk, -struct list_head *hist_list, -ktime_t last_feedback, -u16 s, u32 bytes_recv, -u32 previous_x_recv) -{ -/* - * FIXME: - * Will be rewritten in the upcoming new loss intervals code. - * Has to be commented ou because it relies on the old rx history - * data structures - */ -#if 0 - struct tfrc_rx_hist_entry *entry, *next, *tail = NULL; - u32 x_recv, p; - suseconds_t rtt, delta; - ktime_t tstamp = ktime_set(0, 0); - int interval = 0; - int win_count = 0; - int step = 0; - u64 fval; - -
Re: [PATCH 6/7] [NETDEV]: tehuti Fix possible causing oops of net_rx_action
On Wed, 12 Dec 2007 07:20:34 -0800 (PST) David Miller [EMAIL PROTECTED] wrote: From: Stephen Hemminger [EMAIL PROTECTED] Date: Tue, 11 Dec 2007 21:39:39 -0800 On Wed, 12 Dec 2007 13:01:27 +0900 Joonwoo Park [EMAIL PROTECTED] wrote: [NETDEV]: tehuti Fix possible causing oops of net_rx_action Signed-off-by: Joonwoo Park [EMAIL PROTECTED] --- drivers/net/tehuti.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 21230c9..955e749 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -305,6 +305,8 @@ static int bdx_poll(struct napi_struct *napi, int budget) netif_rx_complete(dev, napi); bdx_enable_interrupts(priv); + if (unlikely(work_done == napi-weight)) + return work_done - 1; } return work_done; } A better fix would be not going over budget in the first place. That's not the problem. They are not going over the budget, rather, they are hitting the budget yet doing netif_rx_complete() as well which is illegal. Unless you strictly process less than weight packets, you must not netif_rx_complete() and re-enable chip interrupts. I can't believe people are trying to fix this bug like this. Sorry, I was looking at a different possible problem. The issue is that if netdev_budget was set smaller (say 128) but device weight was set larger (say 256). The new code would still allow the device to do a full swipe (256) packets rather than only 128 as in earlier NAPI. I guess it is an okay behaviour change, because we don't really guarantee that case. The problem with the tehuti driver is the logic around priv-napi_stop. That whole early stop concept should be removed since it just duplicates the logic of netdev-weight but breaks the assumptions in the calling netif_rx_action. -- Stephen Hemminger [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH/RFC] [POWERPC] Add fixed-phy support for fs_enet
This patch adds support to use the fixed-link property of an ethernet node to fs_enet for the CONFIG_PPC_CPM_NEW_BINDING case. Signed-off-by: Jochen Friedrich [EMAIL PROTECTED] --- drivers/net/fs_enet/fs_enet-main.c |9 - 1 files changed, 8 insertions(+), 1 deletions(-) diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index f2a4d39..8220c70 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -1174,8 +1174,15 @@ static int __devinit find_phy(struct device_node *np, struct device_node *phynode, *mdionode; struct resource res; int ret = 0, len; + const u32 *data; + + data = of_get_property(np, fixed-link, NULL); + if (data) { + snprintf(fpi-bus_id, 16, PHY_ID_FMT, 0, *data); + return 0; + } - const u32 *data = of_get_property(np, phy-handle, len); + data = of_get_property(np, phy-handle, len); if (!data || len != 4) return -EINVAL; -- 1.5.3.7 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: ip neigh show not showing arp cache entries?
Chris Friesen a écrit : I retested it on an x86 machine and am seeing similar problems. First, arp gives the arp table as expected: [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 arp -n Address HWtype HWaddress Flags MaskIface 172.24.0.9 ether 00:03:CC:51:06:5E Cbond0 10.41.18.101 ether 00:0E:0C:5E:95:BD Ceth6 172.24.137.0 ether 00:C0:8B:08:E4:88 Cbond0 172.24.136.0 ether 00:C0:8B:07:B3:7E Cbond0 10.41.18.1 ether 00:00:5E:00:01:01 Ceth6 172.24.0.5 ether 00:01:AF:15:E0:6A Cbond0 172.24.0.13 ether 00:0E:0C:85:FD:D2 Cbond0 172.24.0.3 ether 00:01:AF:14:C8:CC Cbond0 172.24.132.1 ether 00:01:AF:14:E9:88 Cbond0 172.24.0.7 ether 00:07:E9:41:4B:B4 Cbond0 192.168.24.81ether 00:01:AF:14:E9:8A Cbond2 ip neigh show gives nothing, but if I search for specific addresses from the arp table listing they show up: [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show 172.24.0.9 172.24.0.9 dev bond0 lladdr 00:03:cc:51:06:5e DELAY [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show 10.41.18.101 10.41.18.101 dev eth6 lladdr 00:0e:0c:5e:95:bd REACHABLE [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show 172.24.137.0 172.24.137.0 dev bond0 lladdr 00:c0:8b:08:e4:88 REACHABLE Is this expected behaviour? Probably not... Still a 2.6.14 kernel ? Could you send the result of : strace ip neigh show -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] [PATCH v2] [CCID3]: Interface CCID3 code with newer Loss Intervals Database
| +static struct kmem_cache *tfrc_lh_slab __read_mostly;/* === */ | | Yup, this one, is introduced as above but is not initialized at the | module init routine, please see, it should be OK and we can move on: | | http://git.kernel.org/?p=linux/kernel/git/acme/net-2.6.25.git;a=commitdiff;h=a925429ce2189b548dc19037d3ebd4ff35ae4af7 | Sorry for the confusion - you were right, the initialisation was sitting in the wrong patch, not the one in the subject line. In your online version the problem is fixed. Thanks a lot for all the work and for the clarification. Gerrit -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] ehea: kdump support using new shutdown hook
On Wed, Dec 12, 2007 at 05:53:43PM +0100, Thomas Klein wrote: +static void ehea_update_adapter_handles(struct ehea_adapter *adapter) +{ +int i, k; +int j = 0; + +memset(adapter-res_handles, sizeof(adapter-res_handles), 0); arguments wrong way around. Dave -- http://www.codemonkey.org.uk -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] [PATCH v2] [CCID3]: Interface CCID3 code with newer Loss Intervals Database
Em Wed, Dec 12, 2007 at 04:56:32PM +, Gerrit Renker escreveu: | This time around I'm not doing any reordering, just trying to use your | patches as is, but adding this patch as-is produces a kernel that will | crash, no? | | The loss history and the RX/TX packet history slabs are all created in | tfrc.c using the three different __init routines of the dccp_tfrc_lib. | | Yes, the init routines are called and in turn they create the slab | caches, but up to the patch [PATCH 8/8] [PATCH v2] [CCID3]: Interface | CCID3 code with newer Loss Intervals Database the new li slab is not | being created, no? See what I'm talking? | Sorry, there is some weird kind of mix-up going on. Can you please check your patch set: it seems this email exchange refers to an older variant. In the most recent patch set, the slab is introduced in the patch [TFRC]: Ringbuffer to track loss interval history --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -27,6 +23,54 @@ struct dccp_li_hist_entry { u32 dccplih_interval; }; +static struct kmem_cache *tfrc_lh_slab __read_mostly; /* === */ Yup, this one, is introduced as above but is not initialized at the module init routine, please see, it should be OK and we can move on: http://git.kernel.org/?p=linux/kernel/git/acme/net-2.6.25.git;a=commitdiff;h=a925429ce2189b548dc19037d3ebd4ff35ae4af7 +/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */ +static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 }; // ... And this is 6/8, i.e. before 8/8, cf. http://www.mail-archive.com/[EMAIL PROTECTED]/msg03000.html I don't know which tree you are working off, would it be possible to check against the test tree git://eden-feed.erg.abdn.ac.uk/dccp_exp [dccp] I'm doing a fresh clone now. But I think that everything is OK after today's merge request I sent to David. - Arnaldo -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: RFC: igb: Intel 82575 gigabit ethernet driver (take #2)
Kok, Auke wrote: All, here is the second version of the igb (82575) ethernet controller driver. This driver was previously posted 2007-07-13. Many comments received were addressed: - removed indirection wrappers in the same way as e1000e and ixgbe. - cleaned up largely against sparse, checkpatch - removed module parameters and moved functionality to ethtool ioctls - new NAPI API rewrites - by default the driver runs in multiqueue mode with 2 to 40 RX queues enabled. Since the driver is still too large (allthough the patch shrunk from 558k to 416k, almost 34% of its size) to post to this list I am attaching the bzipped patch here. You can get the same driver alternatively from here: http://foo-projects.org/~sofar/0001-igb-PCI-Express-82575-Gigabit-Ethernet-driver.patch [416k] http://foo-projects.org/~sofar/0001-igb-PCI-Express-82575-Gigabit-Ethernet-driver.patch.bz2 [74k] or through git: git://lost.foo-projects.org/~ahkok/git/linux-2.6 #igb There are several concerns still open for this driver: - namespace collisions with e1000. Since there are cleanups planned for e1000 since pci-e hardware is now moved to e1000e, this might resolve them. - hardware code is still a large API. we're expecting more hardware to be supported by this driver in the future and it's not certain which parts we need to keep or not. unfortunately a last-minute effort of mine inserted a stray character. Please re-download the patch files from http or through git to get the updated patch that fixes this issue. The changes needed are below. Cheers, Auke --- diff --git a/drivers/net/igb/e1000_phy.c b/drivers/net/igb/e1000_phy.c index e57222a..1c13156 100644 --- a/drivers/net/igb/e1000_phy.c +++ b/drivers/net/igb/e1000_phy.c @@ -1555,7 +1555,7 @@ s32 e1000_get_phy_info_igp(struct e1000_hw *hw) goto out; ret_val = hw-phy.ops.read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, - ` data); + data); if (ret_val) goto out; -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.24-rc4-mm1
Ilpo Järvinen wrote: On Wed, 5 Dec 2007, David Miller wrote: From: Reuben Farrelly [EMAIL PROTECTED] Date: Thu, 06 Dec 2007 17:59:37 +1100 On 5/12/2007 4:17 PM, Andrew Morton wrote: - Lots of device IDs have been removed from the e1000 driver and moved over to e1000e. So if your e1000 stops working, you forgot to set CONFIG_E1000E. This non fatal oops which I have just noticed may be related to this change then - certainly looks networking related. WARNING: at net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #1 Call Trace: IRQ [8046e038] tcp_fastretrans_alert+0x229/0xe63 [80470975] tcp_ack+0xa3f/0x127d [804747b7] tcp_rcv_established+0x55f/0x7f8 [8047b1aa] tcp_v4_do_rcv+0xdb/0x3a7 [881148a8] :nf_conntrack:nf_ct_deliver_cached_events+0x75/0x99 No, it's from TCP assertions and changes added by Ilpo to the net-2.6.25 tree recently. Yeah, this (very likely) due to the new SACK processing (in net-2.6.25). I'll look what could go wrong with fack_count calculations, most likely it's the reason (I've found earlier one out-of-place retransmission segment in one of my test case which already indicated that there's something incorrect with them but didn't have time to debug it yet). Thanks for report. Some info about how easily you can reproduce couple of sentences about the test case might be useful later on when evaluating the fix. I also got plenty of these when untaring a tarball on NFS. C. WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: IRQ [804115bf] tcp_fastretrans_alert+0xb6/0xbf2 [80413f30] tcp_ack+0xdf3/0xfbe [803da8fb] sk_reset_timer+0x17/0x23 [80416d1e] tcp_rcv_established+0xf3/0x76d [8041d231] tcp_v4_do_rcv+0x37/0x3aa [8041fb1f] tcp_v4_rcv+0x9a9/0xa76 [80402e4e] ip_local_deliver_finish+0x161/0x23c [80403363] ip_local_deliver+0x72/0x77 [80402ca9] ip_rcv_finish+0x371/0x3b5 [804032bd] ip_rcv+0x292/0x2c6 [803e3dcc] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e639d] net_rx_action+0xd5/0x1ad [8023b605] __do_softirq+0x5f/0xe3 [8020c86c] call_softirq+0x1c/0x28 [8020e739] do_softirq+0x39/0x9f [8023b5a4] irq_exit+0x4e/0x50 [8020e880] do_IRQ+0xb7/0xd7 [8020a803] mwait_idle+0x0/0x55 [8020bb66] ret_from_intr+0x0/0xf EOI [8024d623] __atomic_notifier_call_chain+0x20/0x83 [8020a84b] mwait_idle+0x48/0x55 [80209e79] enter_idle+0x22/0x24 [8020a793] cpu_idle+0xa1/0xc5 [8021dfd5] start_secondary+0x3b9/0x3c5 WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: IRQ [804115bf] tcp_fastretrans_alert+0xb6/0xbf2 [80413f30] tcp_ack+0xdf3/0xfbe [804153b8] tcp_data_queue+0x5da/0xb0a [80416d1e] tcp_rcv_established+0xf3/0x76d [8041d231] tcp_v4_do_rcv+0x37/0x3aa [8041fb1f] tcp_v4_rcv+0x9a9/0xa76 [80402e4e] ip_local_deliver_finish+0x161/0x23c [80403363] ip_local_deliver+0x72/0x77 [80402ca9] ip_rcv_finish+0x371/0x3b5 [804032bd] ip_rcv+0x292/0x2c6 [803e3dcc] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e639d] net_rx_action+0xd5/0x1ad [8023b605] __do_softirq+0x5f/0xe3 [8020c86c] call_softirq+0x1c/0x28 [8020e739] do_softirq+0x39/0x9f [8023b5a4] irq_exit+0x4e/0x50 [8020e880] do_IRQ+0xb7/0xd7 [8020a803] mwait_idle+0x0/0x55 [8020bb66] ret_from_intr+0x0/0xf EOI [8024d623] __atomic_notifier_call_chain+0x20/0x83 [8020a84b] mwait_idle+0x48/0x55 [80209e79] enter_idle+0x22/0x24 [8020a793] cpu_idle+0xa1/0xc5 [8021dfd5] start_secondary+0x3b9/0x3c5 WARNING: at /home/legoater/linux/2.6.24-rc4-mm1/net/ipv4/tcp_input.c:2518 tcp_fastretrans_alert() Pid: 0, comm: swapper Not tainted 2.6.24-rc4-mm1 #2 Call Trace: IRQ [804115bf] tcp_fastretrans_alert+0xb6/0xbf2 [80413f30] tcp_ack+0xdf3/0xfbe [804153b8] tcp_data_queue+0x5da/0xb0a [80416d1e] tcp_rcv_established+0xf3/0x76d [8041d231] tcp_v4_do_rcv+0x37/0x3aa [8041fb1f] tcp_v4_rcv+0x9a9/0xa76 [80402e4e] ip_local_deliver_finish+0x161/0x23c [80403363] ip_local_deliver+0x72/0x77 [80402ca9] ip_rcv_finish+0x371/0x3b5 [804032bd] ip_rcv+0x292/0x2c6 [803e3dcc] netif_receive_skb+0x267/0x340 [8806eff4] :tg3:tg3_poll+0x5d2/0x89e [803e639d] net_rx_action+0xd5/0x1ad [8023b605] __do_softirq+0x5f/0xe3 [8020c86c]
Re: [RFC] net: napi fix
David Miller wrote: From: Andrew Gallatin [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 12:29:23 -0500 Is the netif_running() check even required? No, it is not. When a device is brought down, one of the first things that happens is that we wait for all pending NAPI polls to complete, then block any new polls from starting. Great, thanks. I will submit a patch to remove the bogus check. This should fix myri10ge properly. Thank you, Drew -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/8] gianfar: Magic Packet and suspend/resume support.
Signed-off-by: Scott Wood [EMAIL PROTECTED] --- Jeff, can you ack this to go through Paul's tree (assuming nothing wrong with it)? drivers/net/gianfar.c | 137 - drivers/net/gianfar.h | 13 +++- drivers/net/gianfar_ethtool.c | 41 - 3 files changed, 185 insertions(+), 6 deletions(-) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 0431e9e..2c1b8d5 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -142,6 +142,7 @@ int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit); static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb, int length); static void gfar_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp); +static void gfar_halt_nodisable(struct net_device *dev); void gfar_halt(struct net_device *dev); void gfar_start(struct net_device *dev); static void gfar_clear_exact_match(struct net_device *dev); @@ -216,6 +217,7 @@ static int gfar_probe(struct platform_device *pdev) spin_lock_init(priv-txlock); spin_lock_init(priv-rxlock); + spin_lock_init(priv-bflock); platform_set_drvdata(pdev, dev); @@ -393,6 +395,122 @@ static int gfar_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int gfar_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct gfar_private *priv = netdev_priv(dev); + unsigned long flags; + u32 tempval; + + int magic_packet = priv-wol_en + (priv-einfo-device_flags FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + + netif_device_detach(dev); + + if (netif_running(dev)) { + spin_lock_irqsave(priv-txlock, flags); + spin_lock(priv-rxlock); + + gfar_halt_nodisable(dev); + + /* Disable Tx, and Rx if wake-on-LAN is disabled. */ + tempval = gfar_read(priv-regs-maccfg1); + + tempval = ~MACCFG1_TX_EN; + + if (!magic_packet) + tempval = ~MACCFG1_RX_EN; + + gfar_write(priv-regs-maccfg1, tempval); + + spin_unlock(priv-rxlock); + spin_unlock_irqrestore(priv-txlock, flags); + +#ifdef CONFIG_GFAR_NAPI + napi_disable(priv-napi); +#endif + + if (magic_packet) { + /* Enable interrupt on Magic Packet */ + gfar_write(priv-regs-imask, IMASK_MAG); + + /* Enable Magic Packet mode */ + tempval = gfar_read(priv-regs-maccfg2); + tempval |= MACCFG2_MPEN; + gfar_write(priv-regs-maccfg2, tempval); + } else { + phy_stop(priv-phydev); + } + } + + if (!magic_packet || !netif_running(dev)) { + /* The device with the MDIO in its register block must +* not be put to sleep if any other network devices +* using the same MDIO are active. Ideally, some sort +* of reference counting could be done, but for now +* just don't put the MDIO-containing dev to sleep +* at all. +*/ + if (!(priv-einfo-device_flags FSL_GIANFAR_DEV_HAS_MDIO)) { + fsl_sleep_device(priv-einfo-sleep); + priv-suspended = 1; + } + } + + return 0; +} + +static int gfar_resume(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct gfar_private *priv = netdev_priv(dev); + unsigned long flags; + u32 tempval; + int magic_packet = priv-wol_en + (priv-einfo-device_flags FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + + if (priv-suspended) { + fsl_wake_device(priv-einfo-sleep); + priv-suspended = 0; + } + + if (!netif_running(dev)) { + netif_device_attach(dev); + return 0; + } + + if (!magic_packet priv-phydev) + phy_start(priv-phydev); + + /* Disable Magic Packet mode, in case something +* else woke us up. +*/ + + spin_lock_irqsave(priv-txlock, flags); + spin_lock(priv-rxlock); + + tempval = gfar_read(priv-regs-maccfg2); + tempval = ~MACCFG2_MPEN; + gfar_write(priv-regs-maccfg2, tempval); + + gfar_start(dev); + + spin_unlock(priv-rxlock); + spin_unlock_irqrestore(priv-txlock, flags); + + netif_device_attach(dev); + +#ifdef CONFIG_GFAR_NAPI + napi_enable(priv-napi); +#endif + + return 0; +} +#else +#define gfar_suspend NULL +#define gfar_resume NULL +#endif /* Reads the controller's registers to determine what interface * connects it to the PHY. @@ -547,7 +665,7 @@
Re: [RFC] net: napi fix
[I apologize for loosing threading, I'm replying from the archives] The problem is that the driver is doing a NAPI completion and re-enabling chip interrupts with work_done == weight, and that is illegal. The only time at least myri10ge will do this is due to the !netif_running(netdev) check. Eg, from myri10ge's poll: work_done = myri10ge_clean_rx_done(mgp, budget); if (work_done budget || !netif_running(netdev)) { netif_rx_complete(netdev, napi); put_be32(htonl(3), mgp-irq_claim); } Is the netif_running() check even required? Is this just a bad way to solve a race with running NAPI at down() time that would be better solved by putting a napi_synchronize() in the driver's down() routine? I'd rather fix this right than add another check to a questionable code path. Thanks, Drew -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] net: napi fix
From: Andrew Gallatin [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 12:29:23 -0500 Is the netif_running() check even required? No, it is not. When a device is brought down, one of the first things that happens is that we wait for all pending NAPI polls to complete, then block any new polls from starting. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bugme-new] [Bug 9543] New: RTNL: assertion failed at net/ipv6/addrconf.c (2164)/RTNL: assertion failed at net/ipv4/devinet.c (1055)
Herbert Xu [EMAIL PROTECTED] wrote: diff -puN drivers/net/bonding/bond_sysfs.c~bonding-locking-fix drivers/net/bonding/bond_sysfs.c --- a/drivers/net/bonding/bond_sysfs.c~bonding-locking-fix +++ a/drivers/net/bonding/bond_sysfs.c @@ -,8 +,6 @@ static ssize_t bonding_store_primary(str out: write_unlock_bh(bond-lock); - rtnl_unlock(); - Looking at the changeset that added this perhaps the intention is to hold the lock? If so we should add an rtnl_lock to the start of the function. Yes, this function needs to hold locks, and more than just what's there now. I believe the following should be correct; I haven't tested it, though (I'm supposedly on vacation right now). The following change should be correct for the bonding_store_primary case discussed in this thread, and also corrects the bonding_store_active case which performs similar functions. The bond_change_active_slave and bond_select_active_slave functions both require rtnl, bond-lock for read and curr_slave_lock for write_bh, and no other locks. This is so that the lower level mode-specific functions can release locks down to just rtnl in order to call, e.g., dev_set_mac_address with the locks it expects (rtnl only). Signed-off-by: Jay Vosburgh [EMAIL PROTECTED] diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 11b76b3..28a2d80 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1075,7 +1075,10 @@ static ssize_t bonding_store_primary(struct device *d, struct slave *slave; struct bonding *bond = to_bond(d); - write_lock_bh(bond-lock); + rtnl_lock(); + read_lock(bond-lock); + write_lock_bh(bond-curr_slave_lock); + if (!USES_PRIMARY(bond-params.mode)) { printk(KERN_INFO DRV_NAME : %s: Unable to set primary slave; %s is in mode %d\n, @@ -1109,8 +1112,8 @@ static ssize_t bonding_store_primary(struct device *d, } } out: - write_unlock_bh(bond-lock); - + write_unlock_bh(bond-curr_slave_lock); + read_unlock(bond-lock); rtnl_unlock(); return count; @@ -1190,7 +1193,8 @@ static ssize_t bonding_store_active_slave(struct device *d, struct bonding *bond = to_bond(d); rtnl_lock(); - write_lock_bh(bond-lock); + read_lock(bond-lock); + write_lock_bh(bond-curr_slave_lock); if (!USES_PRIMARY(bond-params.mode)) { printk(KERN_INFO DRV_NAME @@ -1247,7 +1251,8 @@ static ssize_t bonding_store_active_slave(struct device *d, } } out: - write_unlock_bh(bond-lock); + write_unlock_bh(bond-curr_slave_lock); + read_unlock(bond-lock); rtnl_unlock(); return count; -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/8][BNX2]: Add ring constants.
[BNX2]: Add ring constants. Define the various ring constants to make the code cleaner. Signed-off-by: Michael Chan [EMAIL PROTECTED] diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 4e7b46e..dfe50c2 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -468,8 +468,7 @@ bnx2_free_mem(struct bnx2 *bp) bp-stats_blk = NULL; } if (bp-tx_desc_ring) { - pci_free_consistent(bp-pdev, - sizeof(struct tx_bd) * TX_DESC_CNT, + pci_free_consistent(bp-pdev, TXBD_RING_SIZE, bp-tx_desc_ring, bp-tx_desc_mapping); bp-tx_desc_ring = NULL; } @@ -477,8 +476,7 @@ bnx2_free_mem(struct bnx2 *bp) bp-tx_buf_ring = NULL; for (i = 0; i bp-rx_max_ring; i++) { if (bp-rx_desc_ring[i]) - pci_free_consistent(bp-pdev, - sizeof(struct rx_bd) * RX_DESC_CNT, + pci_free_consistent(bp-pdev, RXBD_RING_SIZE, bp-rx_desc_ring[i], bp-rx_desc_mapping[i]); bp-rx_desc_ring[i] = NULL; @@ -492,30 +490,24 @@ bnx2_alloc_mem(struct bnx2 *bp) { int i, status_blk_size; - bp-tx_buf_ring = kzalloc(sizeof(struct sw_bd) * TX_DESC_CNT, - GFP_KERNEL); + bp-tx_buf_ring = kzalloc(SW_TXBD_RING_SIZE, GFP_KERNEL); if (bp-tx_buf_ring == NULL) return -ENOMEM; - bp-tx_desc_ring = pci_alloc_consistent(bp-pdev, - sizeof(struct tx_bd) * - TX_DESC_CNT, + bp-tx_desc_ring = pci_alloc_consistent(bp-pdev, TXBD_RING_SIZE, bp-tx_desc_mapping); if (bp-tx_desc_ring == NULL) goto alloc_mem_err; - bp-rx_buf_ring = vmalloc(sizeof(struct sw_bd) * RX_DESC_CNT * - bp-rx_max_ring); + bp-rx_buf_ring = vmalloc(SW_RXBD_RING_SIZE * bp-rx_max_ring); if (bp-rx_buf_ring == NULL) goto alloc_mem_err; - memset(bp-rx_buf_ring, 0, sizeof(struct sw_bd) * RX_DESC_CNT * - bp-rx_max_ring); + memset(bp-rx_buf_ring, 0, SW_RXBD_RING_SIZE * bp-rx_max_ring); for (i = 0; i bp-rx_max_ring; i++) { bp-rx_desc_ring[i] = - pci_alloc_consistent(bp-pdev, -sizeof(struct rx_bd) * RX_DESC_CNT, + pci_alloc_consistent(bp-pdev, RXBD_RING_SIZE, bp-rx_desc_mapping[i]); if (bp-rx_desc_ring[i] == NULL) goto alloc_mem_err; diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index 30ba366..e6a2153 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -6408,6 +6408,11 @@ struct sw_bd { DECLARE_PCI_UNMAP_ADDR(mapping) }; +#define SW_RXBD_RING_SIZE (sizeof(struct sw_bd) * RX_DESC_CNT) +#define RXBD_RING_SIZE (sizeof(struct rx_bd) * RX_DESC_CNT) +#define SW_TXBD_RING_SIZE (sizeof(struct sw_bd) * TX_DESC_CNT) +#define TXBD_RING_SIZE (sizeof(struct tx_bd) * TX_DESC_CNT) + /* Buffered flash (Atmel: AT45DB011B) specific information */ #define SEEPROM_PAGE_BITS 2 #define SEEPROM_PHY_PAGE_SIZE (1 SEEPROM_PAGE_BITS) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/8][BNX2]: Add init. code to handle RX pages.
[BNX2]: Add init. code to handle RX pages. Add new fields to keep track of the pages and the page rings. Add functions to allocate and free pages. Signed-off-by: Michael Chan [EMAIL PROTECTED] diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index db3b750..38e8e31 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -483,6 +483,16 @@ bnx2_free_mem(struct bnx2 *bp) } vfree(bp-rx_buf_ring); bp-rx_buf_ring = NULL; + for (i = 0; i bp-rx_max_pg_ring; i++) { + if (bp-rx_pg_desc_ring[i]) + pci_free_consistent(bp-pdev, RXBD_RING_SIZE, + bp-rx_pg_desc_ring[i], + bp-rx_pg_desc_mapping[i]); + bp-rx_pg_desc_ring[i] = NULL; + } + if (bp-rx_pg_ring) + vfree(bp-rx_pg_ring); + bp-rx_pg_ring = NULL; } static int @@ -514,6 +524,25 @@ bnx2_alloc_mem(struct bnx2 *bp) } + if (bp-rx_pg_ring_size) { + bp-rx_pg_ring = vmalloc(SW_RXPG_RING_SIZE * +bp-rx_max_pg_ring); + if (bp-rx_pg_ring == NULL) + goto alloc_mem_err; + + memset(bp-rx_pg_ring, 0, SW_RXPG_RING_SIZE * + bp-rx_max_pg_ring); + } + + for (i = 0; i bp-rx_max_pg_ring; i++) { + bp-rx_pg_desc_ring[i] = + pci_alloc_consistent(bp-pdev, RXBD_RING_SIZE, +bp-rx_pg_desc_mapping[i]); + if (bp-rx_pg_desc_ring[i] == NULL) + goto alloc_mem_err; + + } + /* Combine status and statistics blocks into one allocation. */ status_blk_size = L1_CACHE_ALIGN(sizeof(struct status_block)); bp-status_stats_size = status_blk_size + @@ -2195,6 +2224,42 @@ bnx2_set_mac_addr(struct bnx2 *bp) } static inline int +bnx2_alloc_rx_page(struct bnx2 *bp, u16 index) +{ + dma_addr_t mapping; + struct sw_pg *rx_pg = bp-rx_pg_ring[index]; + struct rx_bd *rxbd = + bp-rx_pg_desc_ring[RX_RING(index)][RX_IDX(index)]; + struct page *page = alloc_page(GFP_ATOMIC); + + if (!page) + return -ENOMEM; + mapping = pci_map_page(bp-pdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + rx_pg-page = page; + pci_unmap_addr_set(rx_pg, mapping, mapping); + rxbd-rx_bd_haddr_hi = (u64) mapping 32; + rxbd-rx_bd_haddr_lo = (u64) mapping 0x; + return 0; +} + +static void +bnx2_free_rx_page(struct bnx2 *bp, u16 index) +{ + struct sw_pg *rx_pg = bp-rx_pg_ring[index]; + struct page *page = rx_pg-page; + + if (!page) + return; + + pci_unmap_page(bp-pdev, pci_unmap_addr(rx_pg, mapping), PAGE_SIZE, + PCI_DMA_FROMDEVICE); + + __free_page(page); + rx_pg-page = NULL; +} + +static inline int bnx2_alloc_rx_skb(struct bnx2 *bp, u16 index) { struct sk_buff *skb; @@ -4213,11 +4278,31 @@ bnx2_init_rx_ring(struct bnx2 *bp) bp-rx_prod = 0; bp-rx_cons = 0; bp-rx_prod_bseq = 0; + bp-rx_pg_prod = 0; + bp-rx_pg_cons = 0; bnx2_init_rxbd_rings(bp-rx_desc_ring, bp-rx_desc_mapping, bp-rx_buf_use_size, bp-rx_max_ring); CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_PG_BUF_SIZE, 0); + if (bp-rx_pg_ring_size) { + bnx2_init_rxbd_rings(bp-rx_pg_desc_ring, +bp-rx_pg_desc_mapping, +PAGE_SIZE, bp-rx_max_pg_ring); + val = (bp-rx_buf_use_size 16) | PAGE_SIZE; + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_PG_BUF_SIZE, val); + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_RBDC_KEY, + BNX2_L2CTX_RBDC_JUMBO_KEY); + + val = (u64) bp-rx_pg_desc_mapping[0] 32; + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_NX_PG_BDHADDR_HI, val); + + val = (u64) bp-rx_pg_desc_mapping[0] 0x; + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_NX_PG_BDHADDR_LO, val); + + if (CHIP_NUM(bp) == CHIP_NUM_5709) + REG_WR(bp, BNX2_MQ_MAP_L2_3, BNX2_MQ_MAP_L2_3_DEFAULT); + } val = BNX2_L2CTX_CTX_TYPE_CTX_BD_CHN_TYPE_VALUE; val |= BNX2_L2CTX_CTX_TYPE_SIZE_L2; @@ -4230,6 +4315,15 @@ bnx2_init_rx_ring(struct bnx2 *bp) val = (u64) bp-rx_desc_mapping[0] 0x; CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_NX_BDHADDR_LO, val); + ring_prod = prod = bp-rx_pg_prod; + for (i = 0; i bp-rx_pg_ring_size; i++) { + if (bnx2_alloc_rx_page(bp, ring_prod) 0) + break; + prod = NEXT_RX_BD(prod); + ring_prod = RX_PG_RING_IDX(prod); + } + bp-rx_pg_prod = prod; + ring_prod = prod = bp-rx_prod;
Re: [kvm-devel] [PATCH resent] virtio_net: Fix stalled inbound trafficon early packets
Am Mittwoch, 12. Dezember 2007 schrieb Dor Laor: I think the change below handles the race. Otherwise please detail the use case. [...] @@ -292,6 +292,9 @@ static int virtnet_open(struct net_devic return -ENOMEM; napi_enable(vi-napi); + + vi-rvq-vq_ops-enable(vi-rvq); + vi-svq-vq_ops-enable(vi-svq); If you change it to: if (!vi-rvq-vq_ops-enable(vi-rvq)) vi-rvq-vq_ops-kick(vi-rvq); if (!vi-rvq-vq_ops-enable(vi-svq)) vi-rvq-vq_ops-kick(vi-svq); You solve the race of packets already waiting in the queue without triggering the irq. Hmm, I dont fully understand your point. I think this will work as long as the host has not consumed all inbound buffers. It will also require that the host sends an additional packet, no? If no additional packet comes the host has no reason to send an interrupt just because it got a notify hypercall. kick inside a guest also does not trigger the poll routine. It also wont work on the following scenario: in virtnet open we will allocate buffers and send them to the host using the kick callback. The host can now use _all_ buffers for incoming data while interrupts are still disabled and the guest is not running.( Lets say the host bridge has lots of multicast traffic and the guest gets not scheduled for a while). When the guest now continues and enables the interrupts nothing happens. Doing a kick does not help, as the host code will bail out with no dma memory for transfer. Christian -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/8][BNX2]: Add S/G jumbo RX support.
[BNX2]: Add S/G jumbo RX support. David, 8 patches will follow to implement S/G for jumbo RX packets. Please review for 2.6.25. Thanks. Signed-off-by: Michael Chan [EMAIL PROTECTED] --- drivers/net/bnx2.c | 510 +++- drivers/net/bnx2.h | 43 +- drivers/net/bnx2_fw.h | 7034 +--- drivers/net/bnx2_fw2.h | 8416 +--- 4 files changed, 9301 insertions(+), 6702 deletions(-) --- [PATCH 1/8][BNX2]: Add ring constants. [PATCH 2/8][BNX2]: Restructure RX fast path handling. [PATCH 3/8][BNX2]: Restructure RX ring init. code. [PATCH 4/8 gzipped][BNX2]: Update firmware to support S/G RX buffers. [PATCH 5/8][BNX2]: Add init. code to handle RX pages. [PATCH 6/8][BNX2]: Add fast path code to handle RX pages. [PATCH 7/8][BNX2]: Enable S/G for jumbo RX. [PATCH 8/8][BNX2]: Update version to 1.6.9. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/8][BNX2]: Restructure RX fast path handling.
[BNX2]: Restructure RX fast path handling. Add a new function to handle new SKB allocation and to prepare the completed SKB. This makes it easier to add support for non-linear SKB. Signed-off-by: Michael Chan [EMAIL PROTECTED] diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index dfe50c2..14119fb 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -2379,6 +2379,27 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct sk_buff *skb, prod_bd-rx_bd_haddr_lo = cons_bd-rx_bd_haddr_lo; } +static int +bnx2_rx_skb(struct bnx2 *bp, struct sk_buff *skb, unsigned int len, + dma_addr_t dma_addr, u32 ring_idx) +{ + int err; + u16 prod = ring_idx 0x; + + err = bnx2_alloc_rx_skb(bp, prod); + if (unlikely(err)) { + bnx2_reuse_rx_skb(bp, skb, (u16) (ring_idx 16), prod); + return err; + } + + skb_reserve(skb, bp-rx_offset); + pci_unmap_single(bp-pdev, dma_addr, bp-rx_buf_use_size, +PCI_DMA_FROMDEVICE); + + skb_put(skb, len); + return 0; +} + static inline u16 bnx2_get_hw_rx_cons(struct bnx2 *bp) { @@ -2434,7 +2455,8 @@ bnx2_rx_int(struct bnx2 *bp, int budget) L2_FHDR_ERRORS_TOO_SHORT | L2_FHDR_ERRORS_GIANT_FRAME)) { - goto reuse_rx; + bnx2_reuse_rx_skb(bp, skb, sw_ring_cons, sw_ring_prod); + goto next_rx; } /* Since we don't have a jumbo ring, copy small packets @@ -2444,8 +2466,11 @@ bnx2_rx_int(struct bnx2 *bp, int budget) struct sk_buff *new_skb; new_skb = netdev_alloc_skb(bp-dev, len + 2); - if (new_skb == NULL) - goto reuse_rx; + if (new_skb == NULL) { + bnx2_reuse_rx_skb(bp, skb, sw_ring_cons, + sw_ring_prod); + goto next_rx; + } /* aligned copy */ skb_copy_from_linear_data_offset(skb, bp-rx_offset - 2, @@ -2457,20 +2482,9 @@ bnx2_rx_int(struct bnx2 *bp, int budget) sw_ring_cons, sw_ring_prod); skb = new_skb; - } - else if (bnx2_alloc_rx_skb(bp, sw_ring_prod) == 0) { - pci_unmap_single(bp-pdev, dma_addr, - bp-rx_buf_use_size, PCI_DMA_FROMDEVICE); - - skb_reserve(skb, bp-rx_offset); - skb_put(skb, len); - } - else { -reuse_rx: - bnx2_reuse_rx_skb(bp, skb, - sw_ring_cons, sw_ring_prod); + } else if (unlikely(bnx2_rx_skb(bp, skb, len, dma_addr, + (sw_ring_cons 16) | sw_ring_prod))) goto next_rx; - } skb-protocol = eth_type_trans(skb, bp-dev); -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/8][BNX2]: Restructure RX ring init. code.
[BNX2]: Restructure RX ring init. code. Factor out the common functions that will be used to initialize the normal RX rings and the page rings. Change the copybreak constant RX_COPY_THRESH to 128. This same constant will be used for the max. size of the linear SKB when pages are used. Copybreak will be turned off when pages are used. Signed-off-by: Michael Chan [EMAIL PROTECTED] diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 14119fb..81971b1 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -2117,15 +2117,12 @@ bnx2_init_context(struct bnx2 *bp) vcid_addr += (i PHY_CTX_SHIFT); pcid_addr += (i PHY_CTX_SHIFT); - REG_WR(bp, BNX2_CTX_VIRT_ADDR, 0x00); + REG_WR(bp, BNX2_CTX_VIRT_ADDR, vcid_addr); REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr); /* Zero out the context. */ for (offset = 0; offset PHY_CTX_SIZE; offset += 4) - CTX_WR(bp, 0x00, offset, 0); - - REG_WR(bp, BNX2_CTX_VIRT_ADDR, vcid_addr); - REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr); + CTX_WR(bp, vcid_addr, offset, 0); } } } @@ -2459,10 +2456,7 @@ bnx2_rx_int(struct bnx2 *bp, int budget) goto next_rx; } - /* Since we don't have a jumbo ring, copy small packets -* if mtu 1500 -*/ - if ((bp-dev-mtu 1500) (len = RX_COPY_THRESH)) { + if (len = bp-rx_copy_thresh) { struct sk_buff *new_skb; new_skb = netdev_alloc_skb(bp-dev, len + 2); @@ -4172,50 +4166,57 @@ bnx2_init_tx_ring(struct bnx2 *bp) } static void -bnx2_init_rx_ring(struct bnx2 *bp) +bnx2_init_rxbd_rings(struct rx_bd *rx_ring[], dma_addr_t dma[], u32 buf_size, +int num_rings) { - struct rx_bd *rxbd; int i; - u16 prod, ring_prod; - u32 val; - - /* 8 for CRC and VLAN */ - bp-rx_buf_use_size = bp-dev-mtu + ETH_HLEN + bp-rx_offset + 8; - /* hw alignment */ - bp-rx_buf_size = bp-rx_buf_use_size + BNX2_RX_ALIGN; - - ring_prod = prod = bp-rx_prod = 0; - bp-rx_cons = 0; - bp-rx_prod_bseq = 0; + struct rx_bd *rxbd; - for (i = 0; i bp-rx_max_ring; i++) { + for (i = 0; i num_rings; i++) { int j; - rxbd = bp-rx_desc_ring[i][0]; + rxbd = rx_ring[i][0]; for (j = 0; j MAX_RX_DESC_CNT; j++, rxbd++) { - rxbd-rx_bd_len = bp-rx_buf_use_size; + rxbd-rx_bd_len = buf_size; rxbd-rx_bd_flags = RX_BD_FLAGS_START | RX_BD_FLAGS_END; } - if (i == (bp-rx_max_ring - 1)) + if (i == (num_rings - 1)) j = 0; else j = i + 1; - rxbd-rx_bd_haddr_hi = (u64) bp-rx_desc_mapping[j] 32; - rxbd-rx_bd_haddr_lo = (u64) bp-rx_desc_mapping[j] - 0x; + rxbd-rx_bd_haddr_hi = (u64) dma[j] 32; + rxbd-rx_bd_haddr_lo = (u64) dma[j] 0x; } +} + +static void +bnx2_init_rx_ring(struct bnx2 *bp) +{ + int i; + u16 prod, ring_prod; + u32 val, rx_cid_addr = GET_CID_ADDR(RX_CID); + + bp-rx_prod = 0; + bp-rx_cons = 0; + bp-rx_prod_bseq = 0; + + bnx2_init_rxbd_rings(bp-rx_desc_ring, bp-rx_desc_mapping, +bp-rx_buf_use_size, bp-rx_max_ring); + + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_PG_BUF_SIZE, 0); val = BNX2_L2CTX_CTX_TYPE_CTX_BD_CHN_TYPE_VALUE; val |= BNX2_L2CTX_CTX_TYPE_SIZE_L2; val |= 0x02 8; - CTX_WR(bp, GET_CID_ADDR(RX_CID), BNX2_L2CTX_CTX_TYPE, val); + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_CTX_TYPE, val); val = (u64) bp-rx_desc_mapping[0] 32; - CTX_WR(bp, GET_CID_ADDR(RX_CID), BNX2_L2CTX_NX_BDHADDR_HI, val); + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_NX_BDHADDR_HI, val); val = (u64) bp-rx_desc_mapping[0] 0x; - CTX_WR(bp, GET_CID_ADDR(RX_CID), BNX2_L2CTX_NX_BDHADDR_LO, val); + CTX_WR(bp, rx_cid_addr, BNX2_L2CTX_NX_BDHADDR_LO, val); + ring_prod = prod = bp-rx_prod; for (i = 0; i bp-rx_ring_size; i++) { if (bnx2_alloc_rx_skb(bp, ring_prod) 0) { break; @@ -4230,26 +4231,40 @@ bnx2_init_rx_ring(struct bnx2 *bp) REG_WR(bp, MB_RX_CID_ADDR + BNX2_L2CTX_HOST_BSEQ, bp-rx_prod_bseq); } -static void -bnx2_set_rx_ring_size(struct bnx2 *bp, u32 size) +static u32 bnx2_find_max_ring(u32 ring_size, u32 max_size) { - u32 num_rings, max; + u32 max, num_rings = 1; - bp-rx_ring_size =
[PATCH 6/8][BNX2]: Add fast path code to handle RX pages.
[BNX2]: Add fast path code to handle RX pages. Add function to reuse a page in case of allocation or other errors. Add code to construct the completed SKB with the additional data in the pages. Signed-off-by: Michael Chan [EMAIL PROTECTED] diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 38e8e31..6c0fc8a 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -2411,6 +2411,55 @@ bnx2_tx_int(struct bnx2 *bp) } } +static void +bnx2_reuse_rx_skb_pages(struct bnx2 *bp, struct sk_buff *skb, int count) +{ + struct sw_pg *cons_rx_pg, *prod_rx_pg; + struct rx_bd *cons_bd, *prod_bd; + dma_addr_t mapping; + int i; + u16 hw_prod = bp-rx_pg_prod, prod; + u16 cons = bp-rx_pg_cons; + + for (i = 0; i count; i++) { + prod = RX_PG_RING_IDX(hw_prod); + + prod_rx_pg = bp-rx_pg_ring[prod]; + cons_rx_pg = bp-rx_pg_ring[cons]; + cons_bd = bp-rx_pg_desc_ring[RX_RING(cons)][RX_IDX(cons)]; + prod_bd = bp-rx_pg_desc_ring[RX_RING(prod)][RX_IDX(prod)]; + + if (i == 0 skb) { + struct page *page; + struct skb_shared_info *shinfo; + + shinfo = skb_shinfo(skb); + shinfo-nr_frags--; + page = shinfo-frags[shinfo-nr_frags].page; + shinfo-frags[shinfo-nr_frags].page = NULL; + mapping = pci_map_page(bp-pdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + cons_rx_pg-page = page; + pci_unmap_addr_set(cons_rx_pg, mapping, mapping); + dev_kfree_skb(skb); + } + if (prod != cons) { + prod_rx_pg-page = cons_rx_pg-page; + cons_rx_pg-page = NULL; + pci_unmap_addr_set(prod_rx_pg, mapping, + pci_unmap_addr(cons_rx_pg, mapping)); + + prod_bd-rx_bd_haddr_hi = cons_bd-rx_bd_haddr_hi; + prod_bd-rx_bd_haddr_lo = cons_bd-rx_bd_haddr_lo; + + } + cons = RX_PG_RING_IDX(NEXT_RX_BD(cons)); + hw_prod = NEXT_RX_BD(hw_prod); + } + bp-rx_pg_prod = hw_prod; + bp-rx_pg_cons = cons; +} + static inline void bnx2_reuse_rx_skb(struct bnx2 *bp, struct sk_buff *skb, u16 cons, u16 prod) @@ -2443,7 +2492,7 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct sk_buff *skb, static int bnx2_rx_skb(struct bnx2 *bp, struct sk_buff *skb, unsigned int len, - dma_addr_t dma_addr, u32 ring_idx) + unsigned int hdr_len, dma_addr_t dma_addr, u32 ring_idx) { int err; u16 prod = ring_idx 0x; @@ -2451,6 +2500,12 @@ bnx2_rx_skb(struct bnx2 *bp, struct sk_buff *skb, unsigned int len, err = bnx2_alloc_rx_skb(bp, prod); if (unlikely(err)) { bnx2_reuse_rx_skb(bp, skb, (u16) (ring_idx 16), prod); + if (hdr_len) { + unsigned int raw_len = len + 4; + int pages = PAGE_ALIGN(raw_len - hdr_len) PAGE_SHIFT; + + bnx2_reuse_rx_skb_pages(bp, NULL, pages); + } return err; } @@ -2458,7 +2513,69 @@ bnx2_rx_skb(struct bnx2 *bp, struct sk_buff *skb, unsigned int len, pci_unmap_single(bp-pdev, dma_addr, bp-rx_buf_use_size, PCI_DMA_FROMDEVICE); - skb_put(skb, len); + if (hdr_len == 0) { + skb_put(skb, len); + return 0; + } else { + unsigned int i, frag_len, frag_size, pages; + struct sw_pg *rx_pg; + u16 pg_cons = bp-rx_pg_cons; + u16 pg_prod = bp-rx_pg_prod; + + frag_size = len + 4 - hdr_len; + pages = PAGE_ALIGN(frag_size) PAGE_SHIFT; + skb_put(skb, hdr_len); + + for (i = 0; i pages; i++) { + frag_len = min(frag_size, (unsigned int) PAGE_SIZE); + if (unlikely(frag_len = 4)) { + unsigned int tail = 4 - frag_len; + + bp-rx_pg_cons = pg_cons; + bp-rx_pg_prod = pg_prod; + bnx2_reuse_rx_skb_pages(bp, NULL, pages - i); + skb-len -= tail; + if (i == 0) { + skb-tail -= tail; + } else { + skb_frag_t *frag = + skb_shinfo(skb)-frags[i - 1]; + frag-size -= tail; + skb-data_len -= tail; + skb-truesize -= tail; +
[PATCH 7/8][BNX2]: Enable S/G for jumbo RX.
[BNX2]: Enable S/G for jumbo RX. If the MTU requires more than 1 page for the SKB, enable the page ring and calculate the size of the page ring. This will guarantee order-0 allocation regardless of the MTU size. Fixup loopback test packet size so that we don't deal with the pages during loopback test. Signed-off-by: Michael Chan [EMAIL PROTECTED] diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 6c0fc8a..ae081c8 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -4493,15 +4493,32 @@ static u32 bnx2_find_max_ring(u32 ring_size, u32 max_size) static void bnx2_set_rx_ring_size(struct bnx2 *bp, u32 size) { - u32 rx_size; + u32 rx_size, rx_space, jumbo_size; /* 8 for CRC and VLAN */ rx_size = bp-dev-mtu + ETH_HLEN + bp-rx_offset + 8; + rx_space = SKB_DATA_ALIGN(rx_size + BNX2_RX_ALIGN) + NET_SKB_PAD + + sizeof(struct skb_shared_info); + bp-rx_copy_thresh = RX_COPY_THRESH; bp-rx_pg_ring_size = 0; bp-rx_max_pg_ring = 0; bp-rx_max_pg_ring_idx = 0; + if (rx_space PAGE_SIZE) { + int pages = PAGE_ALIGN(bp-dev-mtu - 40) PAGE_SHIFT; + + jumbo_size = size * pages; + if (jumbo_size MAX_TOTAL_RX_PG_DESC_CNT) + jumbo_size = MAX_TOTAL_RX_PG_DESC_CNT; + + bp-rx_pg_ring_size = jumbo_size; + bp-rx_max_pg_ring = bnx2_find_max_ring(jumbo_size, + MAX_RX_PG_RINGS); + bp-rx_max_pg_ring_idx = (bp-rx_max_pg_ring * RX_DESC_CNT) - 1; + rx_size = RX_COPY_THRESH + bp-rx_offset; + bp-rx_copy_thresh = 0; + } bp-rx_buf_use_size = rx_size; /* hw alignment */ @@ -4881,7 +4898,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode) else return -EINVAL; - pkt_size = 1514; + pkt_size = min(bp-dev-mtu + ETH_HLEN, bp-rx_jumbo_thresh - 4); skb = netdev_alloc_skb(bp-dev, pkt_size); if (!skb) return -ENOMEM; -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/8][BNX2]: Update version to 1.6.9.
[BNX2]: Update version to 1.6.9. Signed-off-by: Michael Chan [EMAIL PROTECTED] diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index ae081c8..469d259 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -56,8 +56,8 @@ #define DRV_MODULE_NAMEbnx2 #define PFX DRV_MODULE_NAME: -#define DRV_MODULE_VERSION 1.6.9 -#define DRV_MODULE_RELDATE December 8, 2007 +#define DRV_MODULE_VERSION 1.7.0 +#define DRV_MODULE_RELDATE December 11, 2007 #define RUN_AT(x) (jiffies + (x)) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: ip neigh show not showing arp cache entries?
Eric Dumazet wrote: Chris Friesen a écrit : Is this expected behaviour? Probably not... Still a 2.6.14 kernel ? Yep. Embedded hardware, so I'm unable to test with a more recent kernel. Could you send the result of : strace ip neigh show I've attached two strace runs, one of ip neigh show and one of ip neigh show 10.41.18.101. Chris [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 strace ip neigh show execve(/sbin/ip, [ip, neigh, show], [/* 14 vars */]) = 0 uname({sys=Linux, node=typhoon-base-unit0, ...}) = 0 brk(0) = 0x806b000 access(/etc/ld.so.preload, R_OK) = -1 ENOENT (No such file or directory) open(/etc/ld.so.cache, O_RDONLY) = 3 fstat64(3, {st_mode=S_IFREG|0644, st_size=58478, ...}) = 0 mmap2(NULL, 58478, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7f56000 close(3)= 0 open(/lib/libresolv.so.2, O_RDONLY) = 3 read(3, \177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\\0..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=75541, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f55000 mmap2(NULL, 71816, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7f43000 mmap2(0xb7f51000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe) = 0xb7f51000 mmap2(0xb7f53000, 6280, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7f53000 close(3)= 0 open(/lib/libc.so.6, O_RDONLY)= 3 read(3, \177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\216O\1..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=1407983, ...}) = 0 mmap2(NULL, 1146076, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7e2b000 mmap2(0xb7f39000, 32768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x10d) = 0xb7f39000 mmap2(0xb7f41000, 7388, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7f41000 close(3)= 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7e2a000 mprotect(0xb7f39000, 20480, PROT_READ) = 0 set_thread_area({entry_number:-1 - 6, base_addr:0xb7e2a6b0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0 munmap(0xb7f56000, 58478) = 0 socket(PF_NETLINK, SOCK_RAW, 0) = 3 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [32768], 4) = 0 bind(3, {sa_family=AF_NETLINK, pid=0, groups=}, 12) = 0 getsockname(3, {sa_family=AF_NETLINK, pid=6150, groups=}, [12]) = 0 time(NULL) = 1197465643 sendto(3, \24\0\0\0\22\0\1\3,\340_G\0\0\0\0\0\0\0\0, 20, 0, {sa_family=AF_NETLINK, pid=0, groups=}, 12) = 20 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=}, msg_iov(1)=[{\364\0\0\0\20\0\2\0,\340_G\6\30\0\0\0\0\1\0\1\0\0\0C\30..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3544 brk(0) = 0x806b000 brk(0x808c000) = 0x808c000 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=}, msg_iov(1)=[{\24\0\0\0\3\0\2\0,\340_G\6\30\0\0\0\0\0\0\1\0\0\0C\30\0..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 20 sendto(3, \24\0\0\0\36\0\1\3-\340_G\0\0\0\0\0\0\0\0, 20, 0, {sa_family=AF_NETLINK, pid=0, groups=}, 12) = 20 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=}, msg_iov(1)=[{[EMAIL PROTECTED]..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 264 recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=}, msg_iov(1)=[{[EMAIL PROTECTED]..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 20 exit_group(0) = ? Process 6150 detached [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 ip neigh show 10.41.18.101 10.41.18.101 dev eth6 lladdr 00:0e:0c:5e:95:bd REACHABLE [EMAIL PROTECTED]:/tftpboot/cnp/0-0-5-0/0-0-5-0 strace ip neigh show 10.41.18.101 execve(/sbin/ip, [ip, neigh, show, 10.41.18.101], [/* 14 vars */]) = 0 uname({sys=Linux, node=typhoon-base-unit0, ...}) = 0 brk(0) = 0x806b000 access(/etc/ld.so.preload, R_OK) = -1 ENOENT (No such file or directory) open(/etc/ld.so.cache, O_RDONLY) = 3 fstat64(3, {st_mode=S_IFREG|0644, st_size=58478, ...}) = 0 mmap2(NULL, 58478, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7fd5000 close(3)= 0 open(/lib/libresolv.so.2, O_RDONLY) = 3 read(3, \177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\\0..., 512) = 512 fstat64(3, {st_mode=S_IFREG|0755, st_size=75541, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7fd4000 mmap2(NULL, 71816, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7fc2000 mmap2(0xb7fd, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe) = 0xb7fd mmap2(0xb7fd2000, 6280, PROT_READ|PROT_WRITE,
Re: ip neigh show not showing arp cache entries?
Chris Friesen a écrit : Eric Dumazet wrote: Chris Friesen a écrit : Is this expected behaviour? Probably not... Still a 2.6.14 kernel ? Yep. Embedded hardware, so I'm unable to test with a more recent kernel. And what is the version of ip command you have on this machine ? ip -V You may try other versions of this command http://devresources.linux-foundation.org/dev/iproute2/download/ -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] netfilter : xt_hashlimit should use time_after_eq()
Hi David Patrick I believe this patch is needed for linux-2.6.24 Thank you [PATCH] netfilter : xt_hashlimit should use time_after_eq() In order to avoid jiffies wraparound and its effect, special care must be taken when doing comparisons ... Signed-off-by: Eric Dumazet [EMAIL PROTECTED] diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 1910367..2ef44d8 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -240,7 +240,7 @@ static bool select_all(const struct xt_hashlimit_htable *ht, static bool select_gc(const struct xt_hashlimit_htable *ht, const struct dsthash_ent *he) { - return jiffies = he-expires; + return time_after_eq(jiffies, he-expires); } static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
Re: [PATCH 1/3] [IPSEC]: Make xfrm_lookup flags argument a bit-field
From: Herbert Xu [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 09:57:59 +0800 [IPSEC]: Make xfrm_lookup flags argument a bit-field This patch introduces an enum for bits in the flags argument of xfrm_lookup. This is so that we can cram more information into it later. Since all current users use just the values 0 and 1, XFRM_LOOKUP_WAIT has been added with the value 1 0 to represent the current meaning of flags. The test in __xfrm_lookup has been changed accordingly. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Begrudgingly, I've applied this, but only to make the merging of your work easier. I think this flag is totally wrong and you need to fix this up. This value is not an __xfrm_lookup() flag at all, and this issue would have been clear had you tried to fix up the __xfrm_lookup() call sites. There is no point in adding the named constant, if you keep the magic constants around, so it's wrong that you didn't hit all the call sites when you added this flag. This flag propages from a level before __xfrm_lookup() in many cases, therefore it is a generic route lookup flag, not an XFRM layer specific one. ip_route_output_flow() is one of several example cases. It is going to get even more ugly when you add the other XFRM lookup flag in the followon changeset. Now we'll have a mixture of the magic constants '1' and '0', the named version XFRM_LOOKUP_WAIT, and this new XFRM_LOOKUP_* flag for reverse resolution. These values all exist in different contextual namespaces, yet you are allocating and naming them purely from the context of __xfrm_lookup() and that just isn't right. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] [IPSEC]: Added xfrm_decode_session_reverse and xfrmX_policy_check_reverse
From: Herbert Xu [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 09:58:00 +0800 [IPSEC]: Added xfrm_decode_session_reverse and xfrmX_policy_check_reverse RFC 4301 requires us to relookup ICMP traffic that does not match any policies using the reverse of its payload. This patch adds the functions xfrm_decode_session_reverse and xfrmX_policy_check_reverse so we can get the reverse flow to perform such a lookup. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Applied. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] net: napi fix
David Miller wrote: From: Andrew Gallatin [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 12:29:23 -0500 Is the netif_running() check even required? No, it is not. When a device is brought down, one of the first things that happens is that we wait for all pending NAPI polls to complete, then block any new polls from starting. I think this was previously (pre-2.6.24) not the case, which is why e1000 et al has this check as well and that's exactly what is causing most of the net_rx_action oopses in the first place. Without the netif_running() check previously the drivers were just unusable with NAPI and prone to many races with down (i.e. touching some ethtool ioctl which wants to do a reset while routing small packets at high numbers). that's why we added the netif_running() check in the first place :) There might be more drivers lurking that need this change... Auke -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] [IPSEC]: Add ICMP host relookup support
From: Herbert Xu [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 09:58:01 +0800 [IPSEC]: Add ICMP host relookup support RFC 4301 requires us to relookup ICMP traffic that does not match any policies using the reverse of its payload. This patch implements this for ICMP traffic that originates from or terminates on localhost. This is activated on outbound with the new policy flag XFRM_POLICY_ICMP, and on inbound by the new state flag XFRM_STATE_ICMP. On inbound the policy check is now performed by the ICMP protocol so that it can repeat the policy check where necessary. Signed-off-by: Herbert Xu [EMAIL PROTECTED] ... @@ -268,6 +268,7 @@ extern void dst_init(void); /* Flags for xfrm_lookup flags argument. */ enum { XFRM_LOOKUP_WAIT = 1 0, + XFRM_LOOKUP_ICMP = 1 1, }; struct flowi; Sigh :-( Applied. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] HDLC driver: use unregister_netdev instead of unregister_netdevice
From: Wang Chen [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 10:35:56 +0800 [PATCH] HDLC driver: use unregister_netdev instead of unregister_netdevice Since the caller and the upper caller doesn't hod the rtnl semaphore. We should use unregister_netdev instead of unregister_netdevice. Signed-off-by: Wang Chen [EMAIL PROTECTED] Applied, thanks for finding and fixing this bug. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Net: Remove FASTCALL macro
From: Harvey Harrison [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 00:09:03 -0800 X86_32 was the last user of the FASTCALL macro, now that it uses regparm(3) by default, this macro expands to nothing. Signed-off-by: Harvey Harrison [EMAIL PROTECTED] Applied to net-2.6.25, thanks. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2.6.25] netns: network namespace was passed into dev_getbyhwaddr but not used
From: Denis V. Lunev [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 14:05:17 +0300 netns: network namespace was passed into dev_getbyhwaddr but not used Signed-off-by: Denis V. Lunev [EMAIL PROTECTED] Applied. Please always put the subsystem name in the headline in all capitals and surround it with brackets, like this: [NETNS]: Blah blah blah. Also treat the description line as a true English sentence, this means capitalize the first word and put a period at the end of it. This is a changelog message for a professional piece of software, not some innanet slang talk on some IRC channel. :-) I make these edits to every one of your patch submissions and it's getting tiring, I hoped you would notice this yourself before my needing to tell you about it explicitly :-) -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH][NETDEV]: remove netif_running() check from myri10ge_poll()
Remove the bogus netif_running() check from myri10ge_poll(). This eliminates any chance that myri10ge_poll() can trigger an oops by calling netif_rx_complete() and returning with work_done == budget. Signed-off-by: Andrew Gallatin [EMAIL PROTECTED] diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 8def865..c90958f 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -1239,7 +1239,7 @@ static int myri10ge_poll(struct napi_str /* process as many rx events as NAPI will allow */ work_done = myri10ge_clean_rx_done(mgp, budget); - if (work_done budget || !netif_running(netdev)) { + if (work_done budget) { netif_rx_complete(netdev, napi); put_be32(htonl(3), mgp-irq_claim); }
Re: [RFC] mac80211: clean up frame receive handling
On Dec 12, 2007 7:24 PM, Johannes Berg [EMAIL PROTECTED] wrote: [comments welcome. I really need a refresher on what the frame formats mean but I think I did the right thing with skb-protocol here, I also think we had a bug with rfc2042 header frames bigger than 15xx bytes and eth_type_trans()] This cleans up the frame receive handling. After this patch * EAPOL frames addressed to us or the EAPOL group address are always accepted regardless of whether they are encrypted or not why? userspace (wap_supplicant) tryes to control this depending on the network settings. (I don't really know why thought) maybe it should be handled like drop_unencrypted with default to accept all? -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] XFRM: Display the audited SPI value in host byte order
From: Paul Moore [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 14:05:42 -0500 Currently the IPsec protocol SPI values are written to the audit log in network byte order which is different from almost all other values which are recorded in host byte order. This patch corrects this inconsistency by writing the SPI values to the audit record in host byte order. Signed-off-by: Paul Moore [EMAIL PROTECTED] Applied to net-2.6, thanks Paul. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bugme-new] [Bug 9543] New: RTNL: assertion failed at net/ipv6/addrconf.c (2164)/RTNL: assertion failed at net/ipv4/devinet.c (1055)
On Wed, Dec 12, 2007 at 09:46:55AM -0800, Jay Vosburgh wrote: Herbert Xu [EMAIL PROTECTED] wrote: diff -puN drivers/net/bonding/bond_sysfs.c~bonding-locking-fix drivers/net/bonding/bond_sysfs.c --- a/drivers/net/bonding/bond_sysfs.c~bonding-locking-fix +++ a/drivers/net/bonding/bond_sysfs.c @@ -,8 +,6 @@ static ssize_t bonding_store_primary(str out: write_unlock_bh(bond-lock); - rtnl_unlock(); - Looking at the changeset that added this perhaps the intention is to hold the lock? If so we should add an rtnl_lock to the start of the function. Yes, this function needs to hold locks, and more than just what's there now. I believe the following should be correct; I haven't tested it, though (I'm supposedly on vacation right now). The following change should be correct for the bonding_store_primary case discussed in this thread, and also corrects the bonding_store_active case which performs similar functions. The bond_change_active_slave and bond_select_active_slave functions both require rtnl, bond-lock for read and curr_slave_lock for write_bh, and no other locks. This is so that the lower level mode-specific functions can release locks down to just rtnl in order to call, e.g., dev_set_mac_address with the locks it expects (rtnl only). Signed-off-by: Jay Vosburgh [EMAIL PROTECTED] This looks good to me as well Signed-off-by: Andy Gospodarek [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] netfilter : xt_hashlimit should use time_after_eq()
Eric Dumazet wrote: Hi David Patrick I believe this patch is needed for linux-2.6.24 Thank you [PATCH] netfilter : xt_hashlimit should use time_after_eq() In order to avoid jiffies wraparound and its effect, special care must be taken when doing comparisons ... Looks good, thanks Eric. Dave, please apply. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] XFRM: Display the audited SPI value in host byte order
Currently the IPsec protocol SPI values are written to the audit log in network byte order which is different from almost all other values which are recorded in host byte order. This patch corrects this inconsistency by writing the SPI values to the audit record in host byte order. Signed-off-by: Paul Moore [EMAIL PROTECTED] --- net/xfrm/xfrm_state.c | 10 ++ 1 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cf43c49..1af522b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2028,6 +2028,7 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid) { struct audit_buffer *audit_buf; + u32 spi; extern int audit_enabled; if (audit_enabled == 0) @@ -2037,8 +2038,8 @@ xfrm_audit_state_add(struct xfrm_state *x, int result, u32 auid, u32 sid) return; audit_log_format(audit_buf, op=SAD-add res=%u,result); xfrm_audit_common_stateinfo(x, audit_buf); - audit_log_format(audit_buf, spi=%lu(0x%lx), -(unsigned long)x-id.spi, (unsigned long)x-id.spi); + spi = ntohl(x-id.spi); + audit_log_format(audit_buf, spi=%u(0x%x), spi, spi); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_state_add); @@ -2047,6 +2048,7 @@ void xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid) { struct audit_buffer *audit_buf; + u32 spi; extern int audit_enabled; if (audit_enabled == 0) @@ -2056,8 +2058,8 @@ xfrm_audit_state_delete(struct xfrm_state *x, int result, u32 auid, u32 sid) return; audit_log_format(audit_buf, op=SAD-delete res=%u,result); xfrm_audit_common_stateinfo(x, audit_buf); - audit_log_format(audit_buf, spi=%lu(0x%lx), -(unsigned long)x-id.spi, (unsigned long)x-id.spi); + spi = ntohl(x-id.spi); + audit_log_format(audit_buf, spi=%u(0x%x), spi, spi); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_state_delete); -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][NETDEV]: remove netif_running() check from myri10ge_poll()
From: Andrew Gallatin [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 13:38:34 -0500 Remove the bogus netif_running() check from myri10ge_poll(). This eliminates any chance that myri10ge_poll() can trigger an oops by calling netif_rx_complete() and returning with work_done == budget. Signed-off-by: Andrew Gallatin [EMAIL PROTECTED] Acked-by: David S. Miller [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] IPIP: Allow rebinding the tunnel to another interface
From: Michal Schmidt [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 17:29:33 +0100 On Wed, 12 Dec 2007 17:00:14 +0100 Patrick McHardy [EMAIL PROTECTED] wrote: If you change dev-iflink this should trigger a rtnetlink notification. OK, I've added netdev_state_change(dev). Here's the new patch. Applied to net-2.6.25, thanks. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Remove unused define from loopback driver
From: Pavel Emelyanov [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 17:44:54 +0300 The LOOPBACK_OVERHEAD is not used in this file at all. Signed-off-by: Pavel Emelyanov [EMAIL PROTECTED] Applied to net-2.6.25, thanks. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2.6.25] IPv4: thresholds in fib_trie.c are not modified, make them const
From: Denis V. Lunev [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 14:05:55 +0300 -static int halve_threshold = 25; -static int inflate_threshold = 50; -static int halve_threshold_root = 15; -static int inflate_threshold_root = 25; halve_threshold_root and inflate_threshold_root have different values in the net-2.6.25 tree. What tree are you patching against? These were changed way back in: commit 965ffea43d4ebe8cd7b9fee78d651268dd7d23c5 Author: Robert Olsson [EMAIL PROTECTED] Date: Mon Mar 19 16:29:58 2007 -0700 [IPV4]: fib_trie root node settings The threshold for root node can be more aggressive set to get better tree compression. The new setting mekes the root grow from 16 to 19 bits and substansial improvemnt in Aver depth this with the current table of 214393 prefixes But really the dynamic resize should need more investigation both in terms convergence and performance and maybe it should be possible to change... Maybe just for the brave to start with or we may have to back this out. Please don't waste my time like this. Always submit patches against the current tree, even if you think the files you are patching haven't changed. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/8][BNX2]: Add ring constants.
From: Michael Chan [EMAIL PROTECTED] Date: Wed, 12 Dec 2007 10:58:22 -0800 [BNX2]: Add ring constants. Define the various ring constants to make the code cleaner. Signed-off-by: Michael Chan [EMAIL PROTECTED] Applied to net-2.6.25 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html