On Sat, Aug 06, 2005 at 05:22:23PM +1000, Daniel Phillips wrote: > Daniel > > diff -up --recursive 2.6.12.3.clean/include/linux/gfp.h > 2.6.12.3/include/linux/gfp.h > --- 2.6.12.3.clean/include/linux/gfp.h 2005-07-15 17:18:57.000000000 > -0400 > +++ 2.6.12.3/include/linux/gfp.h 2005-08-05 21:53:09.000000000 -0400 > @@ -39,6 +39,7 @@ struct vm_area_struct; > #define __GFP_COMP 0x4000u /* Add compound page metadata */ > #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ > #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ > +#define __GFP_MEMALLOC 0x20000u /* Use emergency reserves */ > > #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ > #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) > diff -up --recursive 2.6.12.3.clean/include/linux/netdevice.h > 2.6.12.3/include/linux/netdevice.h > --- 2.6.12.3.clean/include/linux/netdevice.h 2005-07-15 17:18:57.000000000 > -0400 > +++ 2.6.12.3/include/linux/netdevice.h 2005-08-06 01:06:18.000000000 > -0400 > @@ -371,6 +371,8 @@ struct net_device > struct Qdisc *qdisc_ingress; > struct list_head qdisc_list; > unsigned long tx_queue_len; /* Max frames per queue allowed > */ > + int rx_reserve; > + int rx_reserve_used; > > /* ingress path synchronizer */ > spinlock_t ingress_lock; > @@ -929,6 +931,28 @@ extern void net_disable_timestamp(void) > extern char *net_sysctl_strdup(const char *s); > #endif > > +static inline struct sk_buff *__dev_memalloc_skb(struct net_device *dev, > + unsigned length, int gfp_mask) > +{ > + struct sk_buff *skb = __dev_alloc_skb(length, gfp_mask); > + if (skb) > + goto done; > + if (dev->rx_reserve_used >= dev->rx_reserve) > + return NULL; > + if (!__dev_alloc_skb(length, gfp_mask|__GFP_MEMALLOC)) > + return NULL;; > + dev->rx_reserve_used++;
why bother with rx_reserve at all? Why not just let the second allocation fail, without the rx_reserve_used test? Additionally, I think the rx_reserve_used accounting is wrong, since I could simply free the skb -- but doing so would cause a rx_reserve_used leak in your code, since you only decrement the counter in the TCP IPv4 path. > +done: > + skb->dev = dev; > + return skb; > +} > + > +static inline struct sk_buff *dev_alloc_skb_reserve(struct net_device *dev, > + unsigned length) > +{ > + return __dev_memalloc_skb(dev, length, GFP_ATOMIC); > +} unused function > + > #endif /* __KERNEL__ */ > > #endif /* _LINUX_DEV_H */ > diff -up --recursive 2.6.12.3.clean/include/net/sock.h > 2.6.12.3/include/net/sock.h > --- 2.6.12.3.clean/include/net/sock.h 2005-07-15 17:18:57.000000000 -0400 > +++ 2.6.12.3/include/net/sock.h 2005-08-05 21:53:09.000000000 -0400 > @@ -382,6 +382,7 @@ enum sock_flags { > SOCK_NO_LARGESEND, /* whether to sent large segments or not */ > SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ > SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ > + SOCK_MEMALLOC, /* protocol can use memalloc reserve */ > }; > > static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) > @@ -399,6 +400,11 @@ static inline int sock_flag(struct sock > return test_bit(flag, &sk->sk_flags); > } > > +static inline int is_memalloc_sock(struct sock *sk) > +{ > + return sock_flag(sk, SOCK_MEMALLOC); > +} > + > static inline void sk_acceptq_removed(struct sock *sk) > { > sk->sk_ack_backlog--; > diff -up --recursive 2.6.12.3.clean/mm/page_alloc.c 2.6.12.3/mm/page_alloc.c > --- 2.6.12.3.clean/mm/page_alloc.c 2005-07-15 17:18:57.000000000 -0400 > +++ 2.6.12.3/mm/page_alloc.c 2005-08-05 21:53:09.000000000 -0400 > @@ -802,8 +802,8 @@ __alloc_pages(unsigned int __nocast gfp_ > > /* This allocation should allow future memory freeing. */ > > - if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) > - && !in_interrupt()) { > + if ((((p->flags & PF_MEMALLOC) || > unlikely(test_thread_flag(TIF_MEMDIE))) > + && !in_interrupt()) || (gfp_mask & __GFP_MEMALLOC)) { > if (!(gfp_mask & __GFP_NOMEMALLOC)) { > /* go through the zonelist yet again, ignoring mins */ > for (i = 0; (z = zones[i]) != NULL; i++) { > diff -up --recursive 2.6.12.3.clean/net/ethernet/eth.c > 2.6.12.3/net/ethernet/eth.c > --- 2.6.12.3.clean/net/ethernet/eth.c 2005-07-15 17:18:57.000000000 -0400 > +++ 2.6.12.3/net/ethernet/eth.c 2005-08-06 02:32:02.000000000 -0400 > @@ -281,6 +281,7 @@ void ether_setup(struct net_device *dev) > dev->mtu = 1500; /* eth_mtu */ > dev->addr_len = ETH_ALEN; > dev->tx_queue_len = 1000; /* Ethernet wants good queues */ > + dev->rx_reserve = 50; > dev->flags = IFF_BROADCAST|IFF_MULTICAST; > > memset(dev->broadcast,0xFF, ETH_ALEN); > diff -up --recursive 2.6.12.3.clean/net/ipv4/tcp_ipv4.c > 2.6.12.3/net/ipv4/tcp_ipv4.c > --- 2.6.12.3.clean/net/ipv4/tcp_ipv4.c 2005-07-15 17:18:57.000000000 > -0400 > +++ 2.6.12.3/net/ipv4/tcp_ipv4.c 2005-08-06 00:45:07.000000000 -0400 > @@ -1766,6 +1766,12 @@ int tcp_v4_rcv(struct sk_buff *skb) > if (!sk) > goto no_tcp_socket; > > + if (skb->dev->rx_reserve_used) { > + skb->dev->rx_reserve_used--; // racy if its racy, use atomic_t or somesuch :) Jeff - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html