Hi,

This patch fills in some missing pieces:

   * Support v4 udp: same as v4 tcp, when in reserve, drop packets on
     noncritical sockets

   * Support v4 icmp: when in reserve, drop icmp traffic

   * Add reserve skb support to e1000 driver

   * API for dropping packets before delivery (dev_drop_skb)

   * Atomic_t for reserve accounting

Now ready for proof-of-concept testing.  High level API boilerplate will come
later.

Regards,

Daniel

diff -up --recursive 2.6.12.3.clean/drivers/net/e1000/e1000_main.c 
2.6.12.3/drivers/net/e1000/e1000_main.c
--- 2.6.12.3.clean/drivers/net/e1000/e1000_main.c       2005-07-15 
17:18:57.000000000 -0400
+++ 2.6.12.3/drivers/net/e1000/e1000_main.c     2005-08-06 16:46:13.000000000 
-0400
@@ -3242,7 +3242,7 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
                                cpu_to_le64(ps_page_dma->ps_page_dma[j]);
                }
 
-               skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN);
+               skb = dev_memalloc_skb(netdev, adapter->rx_ps_bsize0 + 
NET_IP_ALIGN);
 
                if(unlikely(!skb))
                        break;
@@ -3253,8 +3253,6 @@ e1000_alloc_rx_buffers_ps(struct e1000_a
                 */
                skb_reserve(skb, NET_IP_ALIGN);
 
-               skb->dev = netdev;
-
                buffer_info->skb = skb;
                buffer_info->length = adapter->rx_ps_bsize0;
                buffer_info->dma = pci_map_single(pdev, skb->data,
diff -up --recursive 2.6.12.3.clean/include/linux/gfp.h 
2.6.12.3/include/linux/gfp.h
--- 2.6.12.3.clean/include/linux/gfp.h  2005-07-15 17:18:57.000000000 -0400
+++ 2.6.12.3/include/linux/gfp.h        2005-08-05 21:53:09.000000000 -0400
@@ -39,6 +39,7 @@ struct vm_area_struct;
 #define __GFP_COMP     0x4000u /* Add compound page metadata */
 #define __GFP_ZERO     0x8000u /* Return zeroed page on success */
 #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
+#define __GFP_MEMALLOC  0x20000u /* Use emergency reserves */
 
 #define __GFP_BITS_SHIFT 20    /* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
diff -up --recursive 2.6.12.3.clean/include/linux/netdevice.h 
2.6.12.3/include/linux/netdevice.h
--- 2.6.12.3.clean/include/linux/netdevice.h    2005-07-15 17:18:57.000000000 
-0400
+++ 2.6.12.3/include/linux/netdevice.h  2005-08-06 16:37:14.000000000 -0400
@@ -371,6 +371,8 @@ struct net_device
        struct Qdisc            *qdisc_ingress;
        struct list_head        qdisc_list;
        unsigned long           tx_queue_len;   /* Max frames per queue allowed 
*/
+       int                     rx_reserve;
+       atomic_t                rx_reserve_used;
 
        /* ingress path synchronizer */
        spinlock_t              ingress_lock;
@@ -662,6 +664,49 @@ static inline void dev_kfree_skb_any(str
                dev_kfree_skb(skb);
 }
 
+/*
+ * Support for critical network IO under low memory conditions
+ */
+static inline int dev_reserve_used(struct net_device *dev)
+{
+       return atomic_read(&dev->rx_reserve_used);
+}
+
+static inline struct sk_buff *__dev_memalloc_skb(struct net_device *dev,
+       unsigned length, int gfp_mask)
+{
+       struct sk_buff *skb = __dev_alloc_skb(length, gfp_mask);
+       if (skb)
+               goto done;
+       if (dev_reserve_used(dev) >= dev->rx_reserve)
+               return NULL;
+       if (!__dev_alloc_skb(length, gfp_mask|__GFP_MEMALLOC))
+               return NULL;;
+       atomic_inc(&dev->rx_reserve_used);
+done:
+       skb->dev = dev;
+       return skb;
+}
+
+static inline struct sk_buff *dev_memalloc_skb(struct net_device *dev,
+       unsigned length)
+{
+       return __dev_memalloc_skb(dev, length, GFP_ATOMIC);
+}
+
+static inline void dev_unreserve(struct net_device *dev)
+{
+       if (atomic_dec_return(&dev->rx_reserve_used) < 0)
+               atomic_inc(&dev->rx_reserve_used);
+}
+
+static inline void dev_drop_skb(struct sk_buff *skb)
+{
+       struct net_device *dev = skb->dev;
+       __kfree_skb(skb);
+       dev_unreserve(dev);
+}
+
 #define HAVE_NETIF_RX 1
 extern int             netif_rx(struct sk_buff *skb);
 extern int             netif_rx_ni(struct sk_buff *skb);
diff -up --recursive 2.6.12.3.clean/include/net/sock.h 
2.6.12.3/include/net/sock.h
--- 2.6.12.3.clean/include/net/sock.h   2005-07-15 17:18:57.000000000 -0400
+++ 2.6.12.3/include/net/sock.h 2005-08-05 21:53:09.000000000 -0400
@@ -382,6 +382,7 @@ enum sock_flags {
        SOCK_NO_LARGESEND, /* whether to sent large segments or not */
        SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
        SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
+       SOCK_MEMALLOC, /* protocol can use memalloc reserve */
 };
 
 static inline void sock_set_flag(struct sock *sk, enum sock_flags flag)
@@ -399,6 +400,11 @@ static inline int sock_flag(struct sock 
        return test_bit(flag, &sk->sk_flags);
 }
 
+static inline int is_memalloc_sock(struct sock *sk)
+{
+       return sock_flag(sk, SOCK_MEMALLOC);
+}
+
 static inline void sk_acceptq_removed(struct sock *sk)
 {
        sk->sk_ack_backlog--;
diff -up --recursive 2.6.12.3.clean/mm/page_alloc.c 2.6.12.3/mm/page_alloc.c
--- 2.6.12.3.clean/mm/page_alloc.c      2005-07-15 17:18:57.000000000 -0400
+++ 2.6.12.3/mm/page_alloc.c    2005-08-05 21:53:09.000000000 -0400
@@ -802,8 +802,8 @@ __alloc_pages(unsigned int __nocast gfp_
 
        /* This allocation should allow future memory freeing. */
 
-       if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
-                       && !in_interrupt()) {
+       if ((((p->flags & PF_MEMALLOC) || 
unlikely(test_thread_flag(TIF_MEMDIE)))
+                       && !in_interrupt()) || (gfp_mask & __GFP_MEMALLOC)) {
                if (!(gfp_mask & __GFP_NOMEMALLOC)) {
                        /* go through the zonelist yet again, ignoring mins */
                        for (i = 0; (z = zones[i]) != NULL; i++) {
diff -up --recursive 2.6.12.3.clean/net/ethernet/eth.c 
2.6.12.3/net/ethernet/eth.c
--- 2.6.12.3.clean/net/ethernet/eth.c   2005-07-15 17:18:57.000000000 -0400
+++ 2.6.12.3/net/ethernet/eth.c 2005-08-06 02:32:02.000000000 -0400
@@ -281,6 +281,7 @@ void ether_setup(struct net_device *dev)
        dev->mtu                = 1500; /* eth_mtu */
        dev->addr_len           = ETH_ALEN;
        dev->tx_queue_len       = 1000; /* Ethernet wants good queues */        
+       dev->rx_reserve         = 50;
        dev->flags              = IFF_BROADCAST|IFF_MULTICAST;
        
        memset(dev->broadcast,0xFF, ETH_ALEN);
diff -up --recursive 2.6.12.3.clean/net/ipv4/icmp.c 2.6.12.3/net/ipv4/icmp.c
--- 2.6.12.3.clean/net/ipv4/icmp.c      2005-07-15 17:18:57.000000000 -0400
+++ 2.6.12.3/net/ipv4/icmp.c    2005-08-06 16:58:17.000000000 -0400
@@ -944,6 +944,11 @@ int icmp_rcv(struct sk_buff *skb)
        default:;
        }
 
+       if (dev_reserve_used(skb->dev)) {
+               dev_unreserve(skb->dev);
+               goto drop;
+       }
+
        if (!pskb_pull(skb, sizeof(struct icmphdr)))
                goto error;
 
diff -up --recursive 2.6.12.3.clean/net/ipv4/tcp_ipv4.c 
2.6.12.3/net/ipv4/tcp_ipv4.c
--- 2.6.12.3.clean/net/ipv4/tcp_ipv4.c  2005-07-15 17:18:57.000000000 -0400
+++ 2.6.12.3/net/ipv4/tcp_ipv4.c        2005-08-06 16:59:15.000000000 -0400
@@ -1766,6 +1766,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
        if (!sk)
                goto no_tcp_socket;
 
+       if (unlikely(dev_reserve_used(skb->dev))) {
+               dev_unreserve(skb->dev);
+               if (!is_memalloc_sock(sk))
+                       goto discard_and_relse;
+       }
+
 process:
        if (sk->sk_state == TCP_TIME_WAIT)
                goto do_time_wait;
diff -up --recursive 2.6.12.3.clean/net/ipv4/udp.c 2.6.12.3/net/ipv4/udp.c
--- 2.6.12.3.clean/net/ipv4/udp.c       2005-07-15 17:18:57.000000000 -0400
+++ 2.6.12.3/net/ipv4/udp.c     2005-08-06 17:12:20.000000000 -0400
@@ -1152,6 +1152,12 @@ int udp_rcv(struct sk_buff *skb)
        sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, 
skb->dev->ifindex);
 
        if (sk != NULL) {
+               if (unlikely(dev_reserve_used(skb->dev))) {
+                       dev_unreserve(skb->dev);
+                       if (!is_memalloc_sock(sk))
+                               goto drop_noncritical;
+               }
+
                int ret = udp_queue_rcv_skb(sk, skb);
                sock_put(sk);
 
@@ -1163,6 +1169,7 @@ int udp_rcv(struct sk_buff *skb)
                return 0;
        }
 
+drop_noncritical:
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
                goto drop;
 
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to