On Sat, Aug 06, 2005 at 05:22:23PM +1000, Daniel Phillips wrote:
> Daniel
> 
> diff -up --recursive 2.6.12.3.clean/include/linux/gfp.h 
> 2.6.12.3/include/linux/gfp.h
> --- 2.6.12.3.clean/include/linux/gfp.h        2005-07-15 17:18:57.000000000 
> -0400
> +++ 2.6.12.3/include/linux/gfp.h      2005-08-05 21:53:09.000000000 -0400
> @@ -39,6 +39,7 @@ struct vm_area_struct;
>  #define __GFP_COMP   0x4000u /* Add compound page metadata */
>  #define __GFP_ZERO   0x8000u /* Return zeroed page on success */
>  #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
> +#define __GFP_MEMALLOC  0x20000u /* Use emergency reserves */
>  
>  #define __GFP_BITS_SHIFT 20  /* Room for 20 __GFP_FOO bits */
>  #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
> diff -up --recursive 2.6.12.3.clean/include/linux/netdevice.h 
> 2.6.12.3/include/linux/netdevice.h
> --- 2.6.12.3.clean/include/linux/netdevice.h  2005-07-15 17:18:57.000000000 
> -0400
> +++ 2.6.12.3/include/linux/netdevice.h        2005-08-06 01:06:18.000000000 
> -0400
> @@ -371,6 +371,8 @@ struct net_device
>       struct Qdisc            *qdisc_ingress;
>       struct list_head        qdisc_list;
>       unsigned long           tx_queue_len;   /* Max frames per queue allowed 
> */
> +     int                     rx_reserve;
> +     int                     rx_reserve_used;
>  
>       /* ingress path synchronizer */
>       spinlock_t              ingress_lock;
> @@ -929,6 +931,28 @@ extern void              net_disable_timestamp(void)
>  extern char *net_sysctl_strdup(const char *s);
>  #endif
>  
> +static inline struct sk_buff *__dev_memalloc_skb(struct net_device *dev,
> +     unsigned length, int gfp_mask)
> +{
> +     struct sk_buff *skb = __dev_alloc_skb(length, gfp_mask);
> +     if (skb)
> +             goto done;
> +     if (dev->rx_reserve_used >= dev->rx_reserve)
> +             return NULL;
> +     if (!__dev_alloc_skb(length, gfp_mask|__GFP_MEMALLOC))
> +             return NULL;;
> +     dev->rx_reserve_used++;

why bother with rx_reserve at all?  Why not just let the second
allocation fail, without the rx_reserve_used test?

Additionally, I think the rx_reserve_used accounting is wrong, since I
could simply free the skb -- but doing so would cause a rx_reserve_used
leak in your code, since you only decrement the counter in the TCP IPv4
path.


> +done:
> +     skb->dev = dev;
> +     return skb;
> +}
> +
> +static inline struct sk_buff *dev_alloc_skb_reserve(struct net_device *dev,
> +     unsigned length)
> +{
> +     return __dev_memalloc_skb(dev, length, GFP_ATOMIC);
> +}

unused function


> +
>  #endif /* __KERNEL__ */
>  
>  #endif       /* _LINUX_DEV_H */
> diff -up --recursive 2.6.12.3.clean/include/net/sock.h 
> 2.6.12.3/include/net/sock.h
> --- 2.6.12.3.clean/include/net/sock.h 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/include/net/sock.h       2005-08-05 21:53:09.000000000 -0400
> @@ -382,6 +382,7 @@ enum sock_flags {
>       SOCK_NO_LARGESEND, /* whether to sent large segments or not */
>       SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
>       SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
> +     SOCK_MEMALLOC, /* protocol can use memalloc reserve */
>  };
>  
>  static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
> @@ -399,6 +400,11 @@ static inline int sock_flag(struct sock 
>       return test_bit(flag, &sk->sk_flags);
>  }
>  
> +static inline int is_memalloc_sock(struct sock *sk)
> +{
> +     return sock_flag(sk, SOCK_MEMALLOC);
> +}
> +
>  static inline void sk_acceptq_removed(struct sock *sk)
>  {
>       sk->sk_ack_backlog--;
> diff -up --recursive 2.6.12.3.clean/mm/page_alloc.c 2.6.12.3/mm/page_alloc.c
> --- 2.6.12.3.clean/mm/page_alloc.c    2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/mm/page_alloc.c  2005-08-05 21:53:09.000000000 -0400
> @@ -802,8 +802,8 @@ __alloc_pages(unsigned int __nocast gfp_
>  
>       /* This allocation should allow future memory freeing. */
>  
> -     if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
> -                     && !in_interrupt()) {
> +     if ((((p->flags & PF_MEMALLOC) || 
> unlikely(test_thread_flag(TIF_MEMDIE)))
> +                     && !in_interrupt()) || (gfp_mask & __GFP_MEMALLOC)) {
>               if (!(gfp_mask & __GFP_NOMEMALLOC)) {
>                       /* go through the zonelist yet again, ignoring mins */
>                       for (i = 0; (z = zones[i]) != NULL; i++) {
> diff -up --recursive 2.6.12.3.clean/net/ethernet/eth.c 
> 2.6.12.3/net/ethernet/eth.c
> --- 2.6.12.3.clean/net/ethernet/eth.c 2005-07-15 17:18:57.000000000 -0400
> +++ 2.6.12.3/net/ethernet/eth.c       2005-08-06 02:32:02.000000000 -0400
> @@ -281,6 +281,7 @@ void ether_setup(struct net_device *dev)
>       dev->mtu                = 1500; /* eth_mtu */
>       dev->addr_len           = ETH_ALEN;
>       dev->tx_queue_len       = 1000; /* Ethernet wants good queues */        
> +     dev->rx_reserve         = 50;
>       dev->flags              = IFF_BROADCAST|IFF_MULTICAST;
>       
>       memset(dev->broadcast,0xFF, ETH_ALEN);
> diff -up --recursive 2.6.12.3.clean/net/ipv4/tcp_ipv4.c 
> 2.6.12.3/net/ipv4/tcp_ipv4.c
> --- 2.6.12.3.clean/net/ipv4/tcp_ipv4.c        2005-07-15 17:18:57.000000000 
> -0400
> +++ 2.6.12.3/net/ipv4/tcp_ipv4.c      2005-08-06 00:45:07.000000000 -0400
> @@ -1766,6 +1766,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
>       if (!sk)
>               goto no_tcp_socket;
>  
> +     if (skb->dev->rx_reserve_used) {
> +             skb->dev->rx_reserve_used--; // racy

if its racy, use atomic_t or somesuch :)

        Jeff



-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to