[PATCH 3/4] UDP memory accounting and limitation(take 5): memory accounting

2007-10-12 Thread Satoshi OSHIMA
This patch introduces memory usage accounting for UDP.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-udp_limit/net/ipv4/ip_output.c
@@ -743,6 +743,8 @@ static inline int ip_ufo_append_data(str
/* specify the length of each IP datagram fragment*/
skb_shinfo(skb)-gso_size = mtu - fragheaderlen;
skb_shinfo(skb)-gso_type = SKB_GSO_UDP;
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
__skb_queue_tail(sk-sk_write_queue, skb);
 
return 0;
@@ -924,6 +926,9 @@ alloc_new_skb:
}
if (skb == NULL)
goto error;
+   if (sk-sk_prot-memory_allocated)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1023,6 +1028,8 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
skb-truesize += PAGE_SIZE;
atomic_add(PAGE_SIZE, sk-sk_wmem_alloc);
+   if (sk-sk_prot-memory_allocated)
+   
atomic_inc(sk-sk_prot-memory_allocated);
} else {
err = -EMSGSIZE;
goto error;
@@ -1123,7 +1130,9 @@ ssize_t   ip_append_page(struct sock *sk, 
if (unlikely(!skb)) {
err = -ENOBUFS;
goto error;
-   }
+   } else if (sk-sk_prot-memory_allocated)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1202,13 +1211,14 @@ int ip_push_pending_frames(struct sock *
struct iphdr *iph;
__be16 df = 0;
__u8 ttl;
-   int err = 0;
+   int err = 0, send_page_size;
 
if ((skb = __skb_dequeue(sk-sk_write_queue)) == NULL)
goto out;
tail_skb = (skb_shinfo(skb)-frag_list);
 
/* move skb-data to ip header from ext header */
+   send_page_size = sk_datagram_pages(skb-truesize);
if (skb-data  skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(sk-sk_write_queue)) != NULL) {
@@ -1218,6 +1228,7 @@ int ip_push_pending_frames(struct sock *
skb-len += tmp_skb-len;
skb-data_len += tmp_skb-len;
skb-truesize += tmp_skb-truesize;
+   send_page_size += sk_datagram_pages(tmp_skb-truesize);
__sock_put(tmp_skb-sk);
tmp_skb-destructor = NULL;
tmp_skb-sk = NULL;
@@ -1269,6 +1280,8 @@ int ip_push_pending_frames(struct sock *
/* Netfilter gets whole the not fragmented skb. */
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
  skb-dst-dev, dst_output);
+   if (sk-sk_prot-memory_allocated)
+   atomic_sub(send_page_size, sk-sk_prot-memory_allocated);
if (err) {
if (err  0)
err = inet-recverr ? net_xmit_errno(err) : 0;
@@ -1298,9 +1311,15 @@ void ip_flush_pending_frames(struct sock
 {
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
+   int num_flush_mem = 0;
 
-   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL)
+   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL) {
+   num_flush_mem += sk_datagram_pages(skb-truesize);
kfree_skb(skb);
+   }
+
+   if (sk-sk_prot-memory_allocated)
+   atomic_sub(num_flush_mem, sk-sk_prot-memory_allocated);
 
inet-cork.flags = ~IPCORK_OPT;
kfree(inet-cork.opt);
Index: 2.6.23-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-udp_limit/net/ipv4/udp.c
@@ -885,6 +885,9 @@ try_again:
err = ulen;
 
 out_free:
+   atomic_sub(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
+
skb_free_datagram(sk, skb);
 out:
return err;
@@ -892,6 +895,9 @@ out:
 csum_copy_err:
UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
 
+   atomic_sub(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated

[PATCH 2/4] UDP memory accounting and limitation(take 5): accounting unit and variable

2007-10-12 Thread Satoshi OSHIMA
This patch introduces global variable for UDP memory accounting.
The unit is page.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-udp_limit/include/net/sock.h
===
--- 2.6.23-udp_limit.orig/include/net/sock.h
+++ 2.6.23-udp_limit/include/net/sock.h
@@ -723,6 +723,13 @@ static inline int sk_stream_wmem_schedul
   sk_stream_mem_schedule(sk, size, 0);
 }
 
+#define SK_DATAGRAM_MEM_QUANTUM ((int)PAGE_SIZE)
+
+static inline int sk_datagram_pages(int amt)
+{
+   return DIV_ROUND_UP(amt, SK_DATAGRAM_MEM_QUANTUM);
+}
+
 /* Used by processes to lock a socket state, so that
  * interrupts and bottom half handlers won't change it
  * from under us. It essentially blocks any incoming
Index: 2.6.23-udp_limit/include/net/udp.h
===
--- 2.6.23-udp_limit.orig/include/net/udp.h
+++ 2.6.23-udp_limit/include/net/udp.h
@@ -65,6 +65,8 @@ extern rwlock_t udp_hash_lock;
 
 extern struct proto udp_prot;
 
+extern atomic_t udp_memory_allocated;
+
 struct sk_buff;
 
 /*
Index: 2.6.23-udp_limit/net/ipv4/proc.c
===
--- 2.6.23-udp_limit.orig/net/ipv4/proc.c
+++ 2.6.23-udp_limit/net/ipv4/proc.c
@@ -66,7 +66,8 @@ static int sockstat_seq_show(struct seq_
   fold_prot_inuse(tcp_prot), atomic_read(tcp_orphan_count),
   tcp_death_row.tw_count, atomic_read(tcp_sockets_allocated),
   atomic_read(tcp_memory_allocated));
-   seq_printf(seq, UDP: inuse %d\n, fold_prot_inuse(udp_prot));
+   seq_printf(seq, UDP: inuse %d mem %d\n, fold_prot_inuse(udp_prot),
+  atomic_read(udp_memory_allocated));
seq_printf(seq, UDPLITE: inuse %d\n, fold_prot_inuse(udplite_prot));
seq_printf(seq, RAW: inuse %d\n, fold_prot_inuse(raw_prot));
seq_printf(seq,  FRAG: inuse %d memory %d\n, ip_frag_nqueues,
Index: 2.6.23-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-udp_limit/net/ipv4/udp.c
@@ -113,6 +113,8 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
+atomic_t udp_memory_allocated;
+
 static int udp_port_rover;
 
 static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head 
udptable[])

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4]UDP memory accounting and limitation(take 5)

2007-10-12 Thread Satoshi OSHIMA
Hi,

I revised a patch set of UDP memory accounting and
limitation.

This patch set is for kernel 2.6.23. The differences
from take 4 are

* removing unnessesary EXPORT_SYMBOLs
* adding minimal limit of /proc/sys/net/udp_mem
* bugfix of UDP limit affecting protocol other
  than UDP
* introducing __ip_check_max_skb_pages()
* using CTL_UNNUMBERED
* adding udp_mem usage to Documentation/networking/ip_sysctl.txt

How to use UDP memory limitation:

This patch set add

/proc/sys/net/ipv4/udp_mem

as a tuning parameter. 

When you give the number that is greater than 4096,
UDP memory limitation will work. The number of pages
for socket buffer is limited up to udp_mem[pages].

Currently this function drops the packet when
it is sent or received and the number of pages for
socket buffer is beyond the limit. It won't collect
the buffer that is already allocated.

On the other hand, udp_mem is specified as 4096,
UDP memory limitaion will not work.
The deafult number of udp_mem is 4096. 

Comment, review and test are welcome.

Thanks,

Satoshi Oshima
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 4/4] UDP memory usage accounting (take 4): memory limitation

2007-10-12 Thread Satoshi OSHIMA
Hi Stephen,

 On Thu, 11 Oct 2007 21:51:14 +0900
 Satoshi OSHIMA [EMAIL PROTECTED] wrote:
 
 Hi Stephen,

 Thank you for your comment.

{
 +  .ctl_name   = NET_UDP_MEM,
 +  .procname   = udp_mem,
 +  .data   = sysctl_udp_mem,
 +  .maxlen = sizeof(sysctl_udp_mem),
 +  .mode   = 0644,
 +  .proc_handler   = proc_dointvec
 +  },
 +  {
.ctl_name   = NET_TCP_APP_WIN,
.procname   = tcp_app_win,
.data   = sysctl_tcp_app_win,
 if you use proc_dointvec_minmax, then you could inforce min/max
 values for udp_mem for the sysctl
 
 One other comment. Sysctl value indexes are deprecated at this point
 so all new values should use CTL_UNNUMBERED.  Therefore unless NET_UDP_MEM
 already exists, please don't add it.

Thank you for letting me know. I will fix it.

Satoshi Oshima

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] UDP memory accounting and limitation(take 5): fix send buffer check

2007-10-12 Thread Satoshi OSHIMA
This patch introduces sndbuf size check before
memory allcation for send buffer.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc7-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
@@ -1004,6 +1004,11 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
}
} else if (i  MAX_SKB_FRAGS) {
+   if (atomic_read(sk-sk_wmem_alloc) + PAGE_SIZE
+2 * sk-sk_sndbuf) {
+   err = -ENOBUFS;
+   goto error;
+   }
if (copy  PAGE_SIZE)
copy = PAGE_SIZE;
page = alloc_pages(sk-sk_allocation, 0);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] UDP memory accounting and limitation(take 5): memory limitation

2007-10-12 Thread Satoshi OSHIMA
This patch introduces memory limitation for UDP.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-udp_limit/include/net/udp.h
===
--- 2.6.23-udp_limit.orig/include/net/udp.h
+++ 2.6.23-udp_limit/include/net/udp.h
@@ -65,7 +65,10 @@ extern rwlock_t udp_hash_lock;
 
 extern struct proto udp_prot;
 
+/* Used by memory accounting and capping */
+#define UDP_MIN_SKB_PAGES  4096
 extern atomic_t udp_memory_allocated;
+extern int sysctl_udp_mem;
 
 struct sk_buff;
 
Index: 2.6.23-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-udp_limit/net/ipv4/udp.c
@@ -114,6 +114,7 @@ struct hlist_head udp_hash[UDP_HTABLE_SI
 DEFINE_RWLOCK(udp_hash_lock);
 
 atomic_t udp_memory_allocated;
+int sysctl_udp_mem = UDP_MIN_SKB_PAGES;
 
 static int udp_port_rover;
 
@@ -1016,6 +1017,16 @@ int udp_queue_rcv_skb(struct sock * sk, 
goto drop;
}
 
+   if (sk-sk_prot-sysctl_mem[0]  UDP_MIN_SKB_PAGES) {
+   if ((atomic_read(sk-sk_prot-memory_allocated)
+  + sk_datagram_pages(skb-truesize))
+   = sk-sk_prot-sysctl_mem[0]) {
+   UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS,
+   up-pcflag);
+   goto drop;
+   }
+   }
+
if ((rc = sock_queue_rcv_skb(sk,skb))  0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
@@ -1451,6 +1462,7 @@ struct proto udp_prot = {
.unhash= udp_lib_unhash,
.get_port  = udp_v4_get_port,
.memory_allocated  = udp_memory_allocated,
+   .sysctl_mem= sysctl_udp_mem,
.obj_size  = sizeof(struct udp_sock),
 #ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
Index: 2.6.23-udp_limit/net/ipv4/sysctl_net_ipv4.c
===
--- 2.6.23-udp_limit.orig/net/ipv4/sysctl_net_ipv4.c
+++ 2.6.23-udp_limit/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
 #include net/ip.h
 #include net/route.h
 #include net/tcp.h
+#include net/udp.h
 #include net/cipso_ipv4.h
 
 /* From af_inet.c */
@@ -25,6 +26,7 @@ extern int sysctl_ip_nonlocal_bind;
 #ifdef CONFIG_SYSCTL
 static int zero;
 static int tcp_retr1_max = 255;
+static int udp_mem_min = UDP_MIN_SKB_PAGES;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
 #endif
@@ -599,6 +601,16 @@ ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec
},
{
+   .ctl_name   = CTL_UNNUMBERED,
+   .procname   = udp_mem,
+   .data   = sysctl_udp_mem,
+   .maxlen = sizeof(sysctl_udp_mem),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .strategy   = sysctl_intvec,
+   .extra1 = udp_mem_min
+   },
+   {
.ctl_name   = NET_TCP_APP_WIN,
.procname   = tcp_app_win,
.data   = sysctl_tcp_app_win,
Index: 2.6.23-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-udp_limit/net/ipv4/ip_output.c
@@ -75,6 +75,7 @@
 #include net/icmp.h
 #include net/checksum.h
 #include net/inetpeer.h
+#include net/udp.h
 #include linux/igmp.h
 #include linux/netfilter_ipv4.h
 #include linux/netfilter_bridge.h
@@ -699,6 +700,21 @@ csum_page(struct page *page, int offset,
return csum;
 }
 
+static inline int __ip_check_max_skb_pages(struct sock *sk, int size)
+{
+   switch(sk-sk_protocol) {
+   case IPPROTO_UDP:
+   if (sk-sk_prot-sysctl_mem[0]  UDP_MIN_SKB_PAGES)
+   if (atomic_read(sk-sk_prot-memory_allocated)+size
+   = sk-sk_prot-sysctl_mem[0])
+   return -ENOBUFS;
+   /* Fall through */  
+   default:
+   break;
+   }
+   return 0;
+}
+
 static inline int ip_ufo_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
   int odd, struct sk_buff *skb),
@@ -910,6 +926,12 @@ alloc_new_skb:
if (datalen == length + fraggap)
alloclen += rt-u.dst.trailer_len;
 
+   err = __ip_check_max_skb_pages(sk,
+   sk_datagram_pages(SKB_DATA_ALIGN(alloclen + 
hh_len + 15)
+   + sizeof(struct sk_buff)));
+   if (err)
+   goto error

Re: [RFC/PATCH 4/4] UDP memory usage accounting (take 4): memory limitation

2007-10-11 Thread Satoshi OSHIMA
Hi Stephen,

Thank you for your comment.

  {
 +.ctl_name   = NET_UDP_MEM,
 +.procname   = udp_mem,
 +.data   = sysctl_udp_mem,
 +.maxlen = sizeof(sysctl_udp_mem),
 +.mode   = 0644,
 +.proc_handler   = proc_dointvec
 +},
 +{
  .ctl_name   = NET_TCP_APP_WIN,
  .procname   = tcp_app_win,
  .data   = sysctl_tcp_app_win,
 
 if you use proc_dointvec_minmax, then you could inforce min/max
 values for udp_mem for the sysctl

udp_mem has two meanings:
* turn off this limitation function (currently udp_mem=4096)
* limit udp memory (currently udp_mem4096)

To realize this,  udp_mem is evaluated whether udp_mem equals
4096 or smaller in UDP and IP layers.

If udp_mem has proc_dointvec_minmax or dedicated proc handler,
turn off check must be done in UDP and IP layers. This means
there is no reduction of the check in UDP and IP layers.


If you pointed out that minus value of udp_mem is strange,
I agree. I'll fix it.

How about this?
min=4096 (and turn off limitation)
udp_mem4096 (and turn on limitation)


Satoshi Oshima

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 2/4] UDP memory usage accounting (take 4): accounting unit and variable

2007-10-10 Thread Satoshi OSHIMA
Hi Evgeniy,

Thank you for your comment.

 Hi.
 
 On Sat, Oct 06, 2007 at 12:01:07AM +0900, Satoshi OSHIMA ([EMAIL PROTECTED]) 
 wrote:
 --- 2.6.23-rc3-udp_limit.orig/net/ipv4/udp.c
 +++ 2.6.23-rc3-udp_limit/net/ipv4/udp.c
 @@ -113,6 +113,10 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta
  struct hlist_head udp_hash[UDP_HTABLE_SIZE];
  DEFINE_RWLOCK(udp_hash_lock);
  
 +atomic_t udp_memory_allocated;
 +
 +EXPORT_SYMBOL(udp_memory_allocated);
 +
 
 Why do you export this variable?
 It is not accessed from modules in your patchset.

Good point! I'll fix it.

Satoshi Oshima
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 1/4] UDP memory usage accounting (take 4): fix send buffer check

2007-10-05 Thread Satoshi OSHIMA
This patch introduces sndbuf size check before
memory allcation for send buffer.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc7-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
@@ -1004,6 +1004,11 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
}
} else if (i  MAX_SKB_FRAGS) {
+   if (atomic_read(sk-sk_wmem_alloc) + PAGE_SIZE
+2 * sk-sk_sndbuf) {
+   err = -ENOBUFS;
+   goto error;
+   }
if (copy  PAGE_SIZE)
copy = PAGE_SIZE;
page = alloc_pages(sk-sk_allocation, 0);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 2/4] UDP memory usage accounting (take 4): accounting unit and variable

2007-10-05 Thread Satoshi OSHIMA
This patch introduces global variable for UDP memory accounting.
The unit is page.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc3-udp_limit/include/net/sock.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/sock.h
+++ 2.6.23-rc3-udp_limit/include/net/sock.h
@@ -723,6 +723,13 @@ static inline int sk_stream_wmem_schedul
   sk_stream_mem_schedule(sk, size, 0);
 }
 
+#define SK_DATAGRAM_MEM_QUANTUM ((int)PAGE_SIZE)
+
+static inline int sk_datagram_pages(int amt)
+{
+   return DIV_ROUND_UP(amt, SK_DATAGRAM_MEM_QUANTUM);
+}
+
 /* Used by processes to lock a socket state, so that
  * interrupts and bottom half handlers won't change it
  * from under us. It essentially blocks any incoming
Index: 2.6.23-rc3-udp_limit/include/net/udp.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/udp.h
+++ 2.6.23-rc3-udp_limit/include/net/udp.h
@@ -65,6 +65,8 @@ extern rwlock_t udp_hash_lock;
 
 extern struct proto udp_prot;
 
+extern atomic_t udp_memory_allocated;
+
 struct sk_buff;
 
 /*
Index: 2.6.23-rc3-udp_limit/net/ipv4/proc.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/proc.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/proc.c
@@ -66,7 +66,8 @@ static int sockstat_seq_show(struct seq_
   fold_prot_inuse(tcp_prot), atomic_read(tcp_orphan_count),
   tcp_death_row.tw_count, atomic_read(tcp_sockets_allocated),
   atomic_read(tcp_memory_allocated));
-   seq_printf(seq, UDP: inuse %d\n, fold_prot_inuse(udp_prot));
+   seq_printf(seq, UDP: inuse %d mem %d\n, fold_prot_inuse(udp_prot),
+  atomic_read(udp_memory_allocated));
seq_printf(seq, UDPLITE: inuse %d\n, fold_prot_inuse(udplite_prot));
seq_printf(seq, RAW: inuse %d\n, fold_prot_inuse(raw_prot));
seq_printf(seq,  FRAG: inuse %d memory %d\n, ip_frag_nqueues,
Index: 2.6.23-rc3-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/udp.c
@@ -113,6 +113,10 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
+atomic_t udp_memory_allocated;
+
+EXPORT_SYMBOL(udp_memory_allocated);
+
 static int udp_port_rover;
 
 static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head 
udptable[])

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 3/4] UDP memory usage accounting (take 4): memory usage accounting

2007-10-05 Thread Satoshi OSHIMA
This patch introduces memory usage accounting for UDP.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc8-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc8-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc8-udp_limit/net/ipv4/ip_output.c
@@ -743,6 +743,8 @@ static inline int ip_ufo_append_data(str
/* specify the length of each IP datagram fragment*/
skb_shinfo(skb)-gso_size = mtu - fragheaderlen;
skb_shinfo(skb)-gso_type = SKB_GSO_UDP;
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
__skb_queue_tail(sk-sk_write_queue, skb);
 
return 0;
@@ -924,6 +926,9 @@ alloc_new_skb:
}
if (skb == NULL)
goto error;
+   if (sk-sk_prot-memory_allocated)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1023,6 +1028,8 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
skb-truesize += PAGE_SIZE;
atomic_add(PAGE_SIZE, sk-sk_wmem_alloc);
+   if (sk-sk_prot-memory_allocated)
+   
atomic_inc(sk-sk_prot-memory_allocated);
} else {
err = -EMSGSIZE;
goto error;
@@ -1123,7 +1130,9 @@ ssize_t   ip_append_page(struct sock *sk, 
if (unlikely(!skb)) {
err = -ENOBUFS;
goto error;
-   }
+   } else if (sk-sk_prot-memory_allocated)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1202,13 +1211,14 @@ int ip_push_pending_frames(struct sock *
struct iphdr *iph;
__be16 df = 0;
__u8 ttl;
-   int err = 0;
+   int err = 0, send_page_size;
 
if ((skb = __skb_dequeue(sk-sk_write_queue)) == NULL)
goto out;
tail_skb = (skb_shinfo(skb)-frag_list);
 
/* move skb-data to ip header from ext header */
+   send_page_size = sk_datagram_pages(skb-truesize);
if (skb-data  skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(sk-sk_write_queue)) != NULL) {
@@ -1218,6 +1228,7 @@ int ip_push_pending_frames(struct sock *
skb-len += tmp_skb-len;
skb-data_len += tmp_skb-len;
skb-truesize += tmp_skb-truesize;
+   send_page_size += sk_datagram_pages(tmp_skb-truesize);
__sock_put(tmp_skb-sk);
tmp_skb-destructor = NULL;
tmp_skb-sk = NULL;
@@ -1269,6 +1280,8 @@ int ip_push_pending_frames(struct sock *
/* Netfilter gets whole the not fragmented skb. */
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
  skb-dst-dev, dst_output);
+   if (sk-sk_prot-memory_allocated)
+   atomic_sub(send_page_size, sk-sk_prot-memory_allocated);
if (err) {
if (err  0)
err = inet-recverr ? net_xmit_errno(err) : 0;
@@ -1298,9 +1311,15 @@ void ip_flush_pending_frames(struct sock
 {
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
+   int num_flush_mem = 0;
 
-   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL)
+   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL) {
+   num_flush_mem += sk_datagram_pages(skb-truesize);
kfree_skb(skb);
+   }
+
+   if (sk-sk_prot-memory_allocated)
+   atomic_sub(num_flush_mem, sk-sk_prot-memory_allocated);
 
inet-cork.flags = ~IPCORK_OPT;
kfree(inet-cork.opt);
Index: 2.6.23-rc8-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc8-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc8-udp_limit/net/ipv4/udp.c
@@ -887,6 +887,9 @@ try_again:
err = ulen;
 
 out_free:
+   atomic_sub(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
+
skb_free_datagram(sk, skb);
 out:
return err;
@@ -894,6 +897,9 @@ out:
 csum_copy_err:
UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
 
+   atomic_sub(sk_datagram_pages(skb-truesize),
+  sk-sk_prot

[RFC/PATCH 4/4] UDP memory usage accounting (take 4): memory limitation

2007-10-05 Thread Satoshi OSHIMA
This patch introduces memory limitation for UDP.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc9-udp_limit/include/net/udp.h
===
--- 2.6.23-rc9-udp_limit.orig/include/net/udp.h
+++ 2.6.23-rc9-udp_limit/include/net/udp.h
@@ -65,7 +65,10 @@ extern rwlock_t udp_hash_lock;
 
 extern struct proto udp_prot;
 
+/* Used by memory accounting and capping */
+#define UDP_MIN_SKB_PAGES  4096
 extern atomic_t udp_memory_allocated;
+extern int sysctl_udp_mem;
 
 struct sk_buff;
 
Index: 2.6.23-rc9-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc9-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc9-udp_limit/net/ipv4/udp.c
@@ -114,8 +114,10 @@ struct hlist_head udp_hash[UDP_HTABLE_SI
 DEFINE_RWLOCK(udp_hash_lock);
 
 atomic_t udp_memory_allocated;
+int sysctl_udp_mem = 0;
 
 EXPORT_SYMBOL(udp_memory_allocated);
+EXPORT_SYMBOL(sysctl_udp_mem);
 
 static int udp_port_rover;
 
@@ -1018,6 +1020,16 @@ int udp_queue_rcv_skb(struct sock * sk, 
goto drop;
}
 
+   if (sk-sk_prot-sysctl_mem[0]  UDP_MIN_SKB_PAGES) {
+   if ((atomic_read(sk-sk_prot-memory_allocated)
+  + sk_datagram_pages(skb-truesize))
+   = sk-sk_prot-sysctl_mem[0]) {
+   UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS,
+   up-pcflag);
+   goto drop;
+   }
+   }
+
if ((rc = sock_queue_rcv_skb(sk,skb))  0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
@@ -1453,6 +1465,7 @@ struct proto udp_prot = {
.unhash= udp_lib_unhash,
.get_port  = udp_v4_get_port,
.memory_allocated  = udp_memory_allocated,
+   .sysctl_mem= sysctl_udp_mem,
.obj_size  = sizeof(struct udp_sock),
 #ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
Index: 2.6.23-rc9-udp_limit/net/ipv4/sysctl_net_ipv4.c
===
--- 2.6.23-rc9-udp_limit.orig/net/ipv4/sysctl_net_ipv4.c
+++ 2.6.23-rc9-udp_limit/net/ipv4/sysctl_net_ipv4.c
@@ -17,6 +17,7 @@
 #include net/ip.h
 #include net/route.h
 #include net/tcp.h
+#include net/udp.h
 #include net/cipso_ipv4.h
 
 /* From af_inet.c */
@@ -599,6 +600,14 @@ ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec
},
{
+   .ctl_name   = NET_UDP_MEM,
+   .procname   = udp_mem,
+   .data   = sysctl_udp_mem,
+   .maxlen = sizeof(sysctl_udp_mem),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec
+   },
+   {
.ctl_name   = NET_TCP_APP_WIN,
.procname   = tcp_app_win,
.data   = sysctl_tcp_app_win,
Index: 2.6.23-rc9-udp_limit/include/linux/sysctl.h
===
--- 2.6.23-rc9-udp_limit.orig/include/linux/sysctl.h
+++ 2.6.23-rc9-udp_limit/include/linux/sysctl.h
@@ -441,6 +441,7 @@ enum
NET_TCP_ALLOWED_CONG_CONTROL=123,
NET_TCP_MAX_SSTHRESH=124,
NET_TCP_FRTO_RESPONSE=125,
+   NET_UDP_MEM=126,
 };
 
 enum {
Index: 2.6.23-rc9-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc9-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc9-udp_limit/net/ipv4/ip_output.c
@@ -75,6 +75,7 @@
 #include net/icmp.h
 #include net/checksum.h
 #include net/inetpeer.h
+#include net/udp.h
 #include linux/igmp.h
 #include linux/netfilter_ipv4.h
 #include linux/netfilter_bridge.h
@@ -910,6 +911,17 @@ alloc_new_skb:
if (datalen == length + fraggap)
alloclen += rt-u.dst.trailer_len;
 
+   if (sk-sk_prot-sysctl_mem)
+   if (sk-sk_prot-sysctl_mem[0]  
UDP_MIN_SKB_PAGES)
+   if 
((atomic_read(sk-sk_prot-memory_allocated)
+  + sk_datagram_pages(
+  
SKB_DATA_ALIGN(alloclen + hh_len + 15)
++ sizeof(struct 
sk_buff)))
+= sk-sk_prot-sysctl_mem[0]) {
+   err = -ENOBUFS;
+   goto error;
+   }
+
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
@@ -1009,6 +1021,15 @@ alloc_new_skb

[RFC/PATCH 0/4]UDP memory accounting (take 4)

2007-10-05 Thread Satoshi OSHIMA
Hi,

I post a patch set of UDP memory accounting and
limitation.

This patch set is for kernel 2.6.23-rc9. The 
differences from take 3 are

* fixing double accounting bug of ip_send_page()
* adding UDP memory limitation.

How to use UDP memory limitation:

This patch set add

/proc/sys/net/ipv4/udp_mem

as a tuning parameter. 

When you give the number that is greater than 4096,
UDP memory limitation will work. The number of pages
for socket buffer is limited up to udp_mem[pages].

Currently this function drops the packet when
it is sent or received and the number of pages for
socket buffer is beyond the limit. It won't collect
the buffer that is already allocated.

On the other hand, udp_mem is specified as 4096 or
smaller, UDP memory limitaion will not work.
The deafult number is 0. 

Comment, review and test are welcome.


By the way, David pointed out that we should have
the better solution such as memory reclaiming by
callback from vmm. I seriously consider it. But
As Herbert pointed out, it is very difficult to
apply it to TCP because some of the buffer is
already acked. I couldn't find the good solution 
that is applicable for TCP, UDP, route cache and so on.

Let me know, if you find the good way to solve
this problem.

Thanks,

Satoshi Oshima
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 0/3] UDP memory usage accounting (take 3)

2007-10-02 Thread Satoshi OSHIMA
This patch set try to introduce memory usage accounting for 
UDP(currently ipv4 only).

This is the second post of take 2 patch, because previous
post was broken by my MUA setting.

Only what I chage is my MUA setting. There is no code
change from take 2.

This patch set is for 2.6.23-rc8.

I appreciate your comment/test/feedback.

Satoshi Oshima
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 1/3] UDP memory usage accounting (take 3): fix send buffer check

2007-10-02 Thread Satoshi OSHIMA
This patch introduces sndbuf size check before
memory allcation for send buffer.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc7-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
@@ -1004,6 +1004,11 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
}
} else if (i  MAX_SKB_FRAGS) {
+   if (atomic_read(sk-sk_wmem_alloc) + PAGE_SIZE
+2 * sk-sk_sndbuf) {
+   err = -ENOBUFS;
+   goto error;
+   }
if (copy  PAGE_SIZE)
copy = PAGE_SIZE;
page = alloc_pages(sk-sk_allocation, 0);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 2/3] UDP memory usage accounting (take 3): accounting unit and variable

2007-10-02 Thread Satoshi OSHIMA
This patch introduces global variable for UDP memory accounting.
The unit is page.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc3-udp_limit/include/net/sock.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/sock.h
+++ 2.6.23-rc3-udp_limit/include/net/sock.h
@@ -723,6 +723,13 @@ static inline int sk_stream_wmem_schedul
   sk_stream_mem_schedule(sk, size, 0);
 }
 
+#define SK_DATAGRAM_MEM_QUANTUM ((int)PAGE_SIZE)
+
+static inline int sk_datagram_pages(int amt)
+{
+   return DIV_ROUND_UP(amt, SK_DATAGRAM_MEM_QUANTUM);
+}
+
 /* Used by processes to lock a socket state, so that
  * interrupts and bottom half handlers won't change it
  * from under us. It essentially blocks any incoming
Index: 2.6.23-rc3-udp_limit/include/net/udp.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/udp.h
+++ 2.6.23-rc3-udp_limit/include/net/udp.h
@@ -65,6 +65,8 @@ extern rwlock_t udp_hash_lock;
 
 extern struct proto udp_prot;
 
+extern atomic_t udp_memory_allocated;
+
 struct sk_buff;
 
 /*
Index: 2.6.23-rc3-udp_limit/net/ipv4/proc.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/proc.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/proc.c
@@ -66,7 +66,8 @@ static int sockstat_seq_show(struct seq_
   fold_prot_inuse(tcp_prot), atomic_read(tcp_orphan_count),
   tcp_death_row.tw_count, atomic_read(tcp_sockets_allocated),
   atomic_read(tcp_memory_allocated));
-   seq_printf(seq, UDP: inuse %d\n, fold_prot_inuse(udp_prot));
+   seq_printf(seq, UDP: inuse %d mem %d\n, fold_prot_inuse(udp_prot),
+  atomic_read(udp_memory_allocated));
seq_printf(seq, UDPLITE: inuse %d\n, fold_prot_inuse(udplite_prot));
seq_printf(seq, RAW: inuse %d\n, fold_prot_inuse(raw_prot));
seq_printf(seq,  FRAG: inuse %d memory %d\n, ip_frag_nqueues,
Index: 2.6.23-rc3-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/udp.c
@@ -113,6 +113,10 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
+atomic_t udp_memory_allocated;
+
+EXPORT_SYMBOL(udp_memory_allocated);
+
 static int udp_port_rover;
 
 static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head 
udptable[])
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 3/3] UDP memory usage accounting (take 3): measurement

2007-10-02 Thread Satoshi OSHIMA
This patch introduces memory usage measurement for UDP.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc8-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc8-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc8-udp_limit/net/ipv4/ip_output.c
@@ -743,6 +743,8 @@ static inline int ip_ufo_append_data(str
/* specify the length of each IP datagram fragment*/
skb_shinfo(skb)-gso_size = mtu - fragheaderlen;
skb_shinfo(skb)-gso_type = SKB_GSO_UDP;
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
__skb_queue_tail(sk-sk_write_queue, skb);
 
return 0;
@@ -924,6 +926,9 @@ alloc_new_skb:
}
if (skb == NULL)
goto error;
+   if (sk-sk_prot-memory_allocated)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1023,6 +1028,8 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
skb-truesize += PAGE_SIZE;
atomic_add(PAGE_SIZE, sk-sk_wmem_alloc);
+   if (sk-sk_prot-memory_allocated)
+   
atomic_inc(sk-sk_prot-memory_allocated);
} else {
err = -EMSGSIZE;
goto error;
@@ -1123,7 +1130,9 @@ ssize_t   ip_append_page(struct sock *sk, 
if (unlikely(!skb)) {
err = -ENOBUFS;
goto error;
-   }
+   } else if (sk-sk_prot-memory_allocated)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1152,6 +1161,8 @@ ssize_t   ip_append_page(struct sock *sk, 
/*
 * Put the packet on the pending queue.
 */
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
__skb_queue_tail(sk-sk_write_queue, skb);
continue;
}
@@ -1202,13 +1213,14 @@ int ip_push_pending_frames(struct sock *
struct iphdr *iph;
__be16 df = 0;
__u8 ttl;
-   int err = 0;
+   int err = 0, send_page_size;
 
if ((skb = __skb_dequeue(sk-sk_write_queue)) == NULL)
goto out;
tail_skb = (skb_shinfo(skb)-frag_list);
 
/* move skb-data to ip header from ext header */
+   send_page_size = sk_datagram_pages(skb-truesize);
if (skb-data  skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(sk-sk_write_queue)) != NULL) {
@@ -1218,6 +1230,7 @@ int ip_push_pending_frames(struct sock *
skb-len += tmp_skb-len;
skb-data_len += tmp_skb-len;
skb-truesize += tmp_skb-truesize;
+   send_page_size += sk_datagram_pages(tmp_skb-truesize);
__sock_put(tmp_skb-sk);
tmp_skb-destructor = NULL;
tmp_skb-sk = NULL;
@@ -1269,6 +1282,8 @@ int ip_push_pending_frames(struct sock *
/* Netfilter gets whole the not fragmented skb. */
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
  skb-dst-dev, dst_output);
+   if (sk-sk_prot-memory_allocated)
+   atomic_sub(send_page_size, sk-sk_prot-memory_allocated);
if (err) {
if (err  0)
err = inet-recverr ? net_xmit_errno(err) : 0;
@@ -1298,9 +1313,15 @@ void ip_flush_pending_frames(struct sock
 {
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
+   int num_flush_mem = 0;
 
-   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL)
+   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL) {
+   num_flush_mem += sk_datagram_pages(skb-truesize);
kfree_skb(skb);
+   }
+
+   if (sk-sk_prot-memory_allocated)
+   atomic_sub(num_flush_mem, sk-sk_prot-memory_allocated);
 
inet-cork.flags = ~IPCORK_OPT;
kfree(inet-cork.opt);
Index: 2.6.23-rc8-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc8-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc8-udp_limit

Re: [RFC/PATCH 3/3] UDP memory usage accounting (take 2): measurement

2007-10-01 Thread Satoshi OSHIMA
Evgeniy Polyakov wrote:
 On Fri, Sep 28, 2007 at 10:41:31PM +0900, Satoshi OSHIMA
([EMAIL PROTECTED]) wrote:
 This patch introduces memory usage measurement for UDP.

 These 3 points were updated.

 - UDP specific codes in IP layer were removed.

 - atomic_sub() in a loop was removed

 - accounting during socket destruction

 Another approach is to account only at the highest UDP layer and having
 datagram skb destructor just like it is done in TCP, but this approach
 is also resonable.


This patch set try to introduce a memory accounting by the page
because TCP does. And ip_append_data() merges payloads to a sk_buff
if previous sk_buff has enough space. The problem is that
udp_append_data() doesn't recognize whether this merge happens or not.

If the accounting must be in UDP layer, we need to change
the interface of ip_append_data() to know this merge happens.

Once the interface is changed, we have to maintain other
protocol stacks to keep up with the change.

But I didn't want to do it to keep this patch set small
in the first step.


 I already told that patches 1 and 3 have broken indent, please fix that.

Oops! I will fix that.


 A hint: when you are about to submit something network related for
inclusion,
 and strongly believes it is ready, it can be a not that bad idea to add
 David Miller [EMAIL PROTECTED] to copy list, he can complain about
 backlog and so on, but will read you mail twice :) but do not tell anyone.

Thank you for your advice. I will do that!

Satoshi Oshima
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 0/3] UDP memory usage accounting

2007-10-01 Thread Satoshi OSHIMA
Herbert Xu wrote:
 On Fri, Sep 28, 2007 at 09:51:59PM -0700, David Miller wrote:
 There is a per-socket send buffer limit, and there is a per-user open
 file descriptor limit.  Multiply the two to determine how much system
 memory the user can consume using sockets.

 We do have these limits but they're per-process, not per-user.
 Unless you lock down the number of processes each user can have
 to no more than a handful then this is basically useless.

 For example, let's say each socket can lock down 64K of kernel
 memory (which is quite easy to do BTW, just open a TCP/UDP socket,
 send data to it from another socket but keep the data in the
 socket by not calling recvmsg), and that each process can have
 1024 file descriptors (the default), then each process can pin

 64K x 1024 = 64M

 of memory.  So if the user can have 10 processes, then that's
 640M of kernel memory that can be pinned down.  Usually the
 process limit is at least 10 times higher.

Thank you very mush for your comment.

What you pointed out is my motivation to make this patch.
I think that per-process limits won't help to solve this
problem.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 0/3] UDP memory usage accounting

2007-10-01 Thread Satoshi OSHIMA
 On Fri, Sep 28, 2007 at 09:47:37PM -0700, David Miller wrote:
 There are two things we (might) need to guard against, one local and
 one remote.

 Right I was focusing on the local threat.

 If you do a per-user limit, apache would basically just stop at that
 redzone point.  In some sense making the attack more effective because
 then it's trivial to shut down an entire web server this way.

 Having a per-user limit doesn't necessarily mean that we have
 to apply the limit differently to how we apply the system-wide
 limits.  We could keep exactly the same code as we have now but
 check against a per-user limit instead of a system-wide one.

 In other words your apache scenario will continue to work as is
 even with a per-user limit.

I'm afraid that per-user limit won't work for system administrator,
because he can't know who is the rogue user in advance (before
such attack is made). And once the attack is made, system will
not responce because of the lack of memory for slab.

So if he only has per-user limit, he need to split the memory
budget for UDP to each user. The limit per user will be very
small if number of users in the system is large.


 Now where it does become useful is when we have a rogue local
 user.  As it is that user can chew up all of the budgeted TCP
 memory by simply not calling recvmsg.  As I've stated in the
 other email, the existing rlimits don't help because they're
 per-process rather than per-user.

 BTW, this is not fatal for TCP because TCP provides a minimum
 amount of memory for each socket even when we are over the
 limit.  However, if we this was implemented for UDP without
 a minimum guarantee then it'd be quite useless.

Hmm, I didn't realize that. Thank you for your good
suggestion. I will think of it.


 I see no valid argument against doing something similar for sockets.
 Such a register_shrinker() handler for TCP could, for example, look
 for TCP flows which haven't made forward progress in more than a
 certain amount of time and attempt to trim SKB memory from them.

 Yes I agree this would be quite useful for sending.  However, it'll
 be tough to shrink skbs that we've already acked for but the app
 for some reason has decided to leave in the socket by not calling
 recvmsg.

 UDP and other datagram sockets are troublesome because the memory
 gets wholly tied up immediately during the send call and it's not
 easy to liberate anything.  The nice part about datagram sockets,
 however, is that they make forward progress quickly and their
 memory is liberated as soon as the device transmits the packet.
 They don't have to wait for ACKs, windows openning up, or anything
 like that to happen.

 Agreed.  Also the recvmsg case I've described above is much
 simpler for UDP as we can just go through all the sockets and
 free skbs at random :)

 To be honest I don't even think UDP is much of a real problem for this
 reason.

 It's not a hard problem but we do need to have some code for it.

I believe so. Currently, a nasty user can easily stop the system
without root privilege. This may not be a serious problem, but
this is the problem to be fixed.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 3/3] UDP memory usage accounting (take 2): measurement

2007-09-28 Thread Satoshi OSHIMA
This patch introduces memory usage measurement for UDP.

These 3 points were updated.

- UDP specific codes in IP layer were removed.

- atomic_sub() in a loop was removed

- accounting during socket destruction

signed-off-by: Satoshi Oshima [EMAIL PROTECTED]

signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc8-udp_limit/net/ipv4/ip_output.c

===

--- 2.6.23-rc8-udp_limit.orig/net/ipv4/ip_output.c

+++ 2.6.23-rc8-udp_limit/net/ipv4/ip_output.c

@@ -743,6 +743,8 @@ static inline int ip_ufo_append_data(str

 /* specify the length of each IP datagram fragment*/

 skb_shinfo(skb)-gso_size = mtu - fragheaderlen;

 skb_shinfo(skb)-gso_type = SKB_GSO_UDP;

+atomic_add(sk_datagram_pages(skb-truesize),

+   sk-sk_prot-memory_allocated);

 __skb_queue_tail(sk-sk_write_queue, skb);

 

 return 0;

@@ -924,6 +926,9 @@ alloc_new_skb:

 }

 if (skb == NULL)

 goto error;

+if (sk-sk_prot-memory_allocated)

+atomic_add(sk_datagram_pages(skb-truesize),

+   sk-sk_prot-memory_allocated);

 

 /*

  *Fill in the control structures

@@ -1023,6 +1028,8 @@ alloc_new_skb:

 frag = skb_shinfo(skb)-frags[i];

 skb-truesize += PAGE_SIZE;

 atomic_add(PAGE_SIZE, sk-sk_wmem_alloc);

+if (sk-sk_prot-memory_allocated)

+atomic_inc(sk-sk_prot-memory_allocated);

 } else {

 err = -EMSGSIZE;

 goto error;

@@ -1123,7 +1130,9 @@ ssize_tip_append_page(struct sock *sk, 

 if (unlikely(!skb)) {

 err = -ENOBUFS;

 goto error;

-}

+} else if (sk-sk_prot-memory_allocated)

+atomic_add(sk_datagram_pages(skb-truesize),

+   sk-sk_prot-memory_allocated);

 

 /*

  *Fill in the control structures

@@ -1152,6 +1161,8 @@ ssize_tip_append_page(struct sock *sk, 

 /*

  * Put the packet on the pending queue.

  */

+atomic_add(sk_datagram_pages(skb-truesize),

+   sk-sk_prot-memory_allocated);

 __skb_queue_tail(sk-sk_write_queue, skb);

 continue;

 }

@@ -1202,13 +1213,14 @@ int ip_push_pending_frames(struct sock *

 struct iphdr *iph;

 __be16 df = 0;

 __u8 ttl;

-int err = 0;

+int err = 0, send_page_size;

 

 if ((skb = __skb_dequeue(sk-sk_write_queue)) == NULL)

 goto out;

 tail_skb = (skb_shinfo(skb)-frag_list);

 

 /* move skb-data to ip header from ext header */

+send_page_size = sk_datagram_pages(skb-truesize);

 if (skb-data  skb_network_header(skb))

 __skb_pull(skb, skb_network_offset(skb));

 while ((tmp_skb = __skb_dequeue(sk-sk_write_queue)) != NULL) {

@@ -1218,6 +1230,7 @@ int ip_push_pending_frames(struct sock *

 skb-len += tmp_skb-len;

 skb-data_len += tmp_skb-len;

 skb-truesize += tmp_skb-truesize;

+send_page_size += sk_datagram_pages(tmp_skb-truesize);

 __sock_put(tmp_skb-sk);

 tmp_skb-destructor = NULL;

 tmp_skb-sk = NULL;

@@ -1269,6 +1282,8 @@ int ip_push_pending_frames(struct sock *

 /* Netfilter gets whole the not fragmented skb. */

 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,

   skb-dst-dev, dst_output);

+if (sk-sk_prot-memory_allocated)

+atomic_sub(send_page_size, sk-sk_prot-memory_allocated);

 if (err) {

 if (err  0)

 err = inet-recverr ? net_xmit_errno(err) : 0;

@@ -1298,9 +1313,15 @@ void ip_flush_pending_frames(struct sock

 {

 struct inet_sock *inet = inet_sk(sk);

 struct sk_buff *skb;

+int num_flush_mem = 0;

 

-while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL)

+while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL) {

+num_flush_mem += sk_datagram_pages(skb-truesize);

 kfree_skb(skb);

+}

+

+if (sk-sk_prot-memory_allocated)

+atomic_sub(num_flush_mem, sk-sk_prot-memory_allocated);

 

 inet-cork.flags = ~IPCORK_OPT;

 kfree(inet-cork.opt);

Index: 2.6.23-rc8-udp_limit/net/ipv4/udp.c

===

--- 2.6.23-rc8-udp_limit.orig/net/ipv4/udp.c

+++ 2.6.23-rc8-udp_limit/net/ipv4/udp.c

@@ -887,6 +887,9 @@ try_again:

 err = ulen;

 

 out_free:

+atomic_sub(sk_datagram_pages(skb-truesize),

+   sk-sk_prot-memory_allocated);

+

 skb_free_datagram(sk, skb);

 out:

 return err;

@@ -894,6 +897,9 @@ out:

 csum_copy_err:

 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);

 

+atomic_sub(sk_datagram_pages(skb-truesize),

+   sk-sk_prot

[RFC/PATCH 2/3] UDP memory usage accounting: accounting unit and variable

2007-09-28 Thread Satoshi OSHIMA
This patch introduces global variable for UDP memory accounting.
The unit is page.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc3-udp_limit/include/net/sock.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/sock.h
+++ 2.6.23-rc3-udp_limit/include/net/sock.h
@@ -723,6 +723,13 @@ static inline int sk_stream_wmem_schedul
   sk_stream_mem_schedule(sk, size, 0);
 }
 
+#define SK_DATAGRAM_MEM_QUANTUM ((int)PAGE_SIZE)
+
+static inline int sk_datagram_pages(int amt)
+{
+   return DIV_ROUND_UP(amt, SK_DATAGRAM_MEM_QUANTUM);
+}
+
 /* Used by processes to lock a socket state, so that
  * interrupts and bottom half handlers won't change it
  * from under us. It essentially blocks any incoming
Index: 2.6.23-rc3-udp_limit/include/net/udp.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/udp.h
+++ 2.6.23-rc3-udp_limit/include/net/udp.h
@@ -65,6 +65,8 @@ extern rwlock_t udp_hash_lock;
 
 extern struct proto udp_prot;
 
+extern atomic_t udp_memory_allocated;
+
 struct sk_buff;
 
 /*
Index: 2.6.23-rc3-udp_limit/net/ipv4/proc.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/proc.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/proc.c
@@ -66,7 +66,8 @@ static int sockstat_seq_show(struct seq_
   fold_prot_inuse(tcp_prot), atomic_read(tcp_orphan_count),
   tcp_death_row.tw_count, atomic_read(tcp_sockets_allocated),
   atomic_read(tcp_memory_allocated));
-   seq_printf(seq, UDP: inuse %d\n, fold_prot_inuse(udp_prot));
+   seq_printf(seq, UDP: inuse %d mem %d\n, fold_prot_inuse(udp_prot),
+  atomic_read(udp_memory_allocated));
seq_printf(seq, UDPLITE: inuse %d\n, fold_prot_inuse(udplite_prot));
seq_printf(seq, RAW: inuse %d\n, fold_prot_inuse(raw_prot));
seq_printf(seq,  FRAG: inuse %d memory %d\n, ip_frag_nqueues,
Index: 2.6.23-rc3-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/udp.c
@@ -113,6 +113,10 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta
 struct hlist_head udp_hash[UDP_HTABLE_SIZE];
 DEFINE_RWLOCK(udp_hash_lock);
 
+atomic_t udp_memory_allocated;
+
+EXPORT_SYMBOL(udp_memory_allocated);
+
 static int udp_port_rover;
 
 static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head 
udptable[])

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 0/3] UDP memory usage accounting

2007-09-28 Thread Satoshi OSHIMA
Hi,

Thank you for your comment.


Evgeniy Polyakov wrote:
 Hi.

 On Fri, Sep 21, 2007 at 09:18:07PM +0900, Satoshi OSHIMA
([EMAIL PROTECTED]) wrote:
 This patch set try to introduce memory usage accounting for
 UDP(currently ipv4 only).

 Currently, memory usage of UDP can be observed as the sam of
 usage of tx_queue and rx_queue. But I believe that the system
 wide accounting is usefull when heavy loaded condition.

 In the next step, I would like to add memory usage quota
 for UDP to avoid unlimited memory consumption problem
 under DDOS attack.

 Could you please desribed such attack in more details?
 Each UDP socket has its queue length which can not be exceeded
 (roughly), no new sockets are created when remote side sends a packet
 (like after special steps in TCP), so where is possibility to eat all
 the mem?

For example, sk_buff is put on the slab and
slab can be acquired only from ZONE_NORMAL in i386.

In such case, from 300 to 500MB memory consumption will
be fatal. Users can easily open 1000 sockets per process
under default ulimit. If such sockets hold messages but
user processes don't receive it. Almost all slab will
be occupied by sk_buff.


 This patch set is for 2.6.23-rc7.

 I seriously doubt you want to put udp specific hacks and zillions of
 atomic ops all around the code just to know exact number of bytes eaten
 for UDP.
 Please use udp specific code (like udp_sendmsg()) for proper accounting
 if you need that, but not hacks in generic ip code.

I couldn't find the way to account UDP memory consumption
in UDP layer.

In receive path, accounting can be done in UDP layer
because sk_buff is marked for UDP in UDP layer and it is
released in UDP layer.

In send path, sk_buff is aquired in IP layer and also
released in IP layer. Especially, there is a possibility
 of appending data to the preceding sk_buff in send
queue.

On the other hand, I agree that UDP specific code
in IP layer is not preferable. So I generalize UDP
specific code in IP layer in take 2.

Could you take a look at my take 2 patch set?

Satoshi Oshima

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 1/3] UDP memory usage accounting (take 2): fix send buffer check

2007-09-28 Thread Satoshi OSHIMA
This patch introduces sndbuf size check before

memory allcation for send buffer.

signed-off-by: Satoshi Oshima [EMAIL PROTECTED]

signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c

===

--- 2.6.23-rc7-udp_limit.orig/net/ipv4/ip_output.c

+++ 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c

@@ -1004,6 +1004,11 @@ alloc_new_skb:

 frag = skb_shinfo(skb)-frags[i];

 }

 } else if (i  MAX_SKB_FRAGS) {

+if (atomic_read(sk-sk_wmem_alloc) + PAGE_SIZE

+ 2 * sk-sk_sndbuf) {

+err = -ENOBUFS;

+goto error;

+}

 if (copy  PAGE_SIZE)

 copy = PAGE_SIZE;

 page = alloc_pages(sk-sk_allocation, 0);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 2/3] UDP memory usage accounting: accounting unit and variable

2007-09-28 Thread Satoshi OSHIMA
Hi,

Thank you for your comment.

Andi Kleen wrote:
 Satoshi OSHIMA [EMAIL PROTECTED] writes:

 This patch introduces global variable for UDP memory accounting.
 The unit is page.

 The global variable doesn't seem to be very MP scalable, especially
 if you change it for each packet. This will be a very hot cache line,
 in the worst case bouncing around a large machine.

I understand what you pointed out. But I think the accounting
method I'm proposing is very similar to TCP accounting and per
socket accounting.
How do you think of it?


 Possible alternatives:
 - Per CPU variables

I'm afraid that sockets and socket buffers are handled on
various CPUs. I mean that socket creation might be done
on CPU-A but socket receiving might be done on CPU-B.

And per CPU variables must be counted up when socket
cap is checked. I'm afraid that per CPU vaiables are
also costly enough.


 - You only change the global on socket creation time (by pre
allocating a large
 amount) or when the system comes under memory pressure.
 - Batching of the global updates for multiple packets [that's a variant
 of the previous one, might be still too costly though]

 Also for such variables it's usually good to cache line pad them on SMP
 to avoid false sharing with something else.

I believe that memory usage accounting should be done accurately.
Currently I couldn't see how can we know the accurate memory
accounting only when the system is under memory pressure.

But I revised the patch to avoid some atomic operations.

If I could find the good way to avoid atomic operation more,
I will add it.

Satoshi Oshima
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 0/3] UDP memory usage accounting(take 2)

2007-09-28 Thread Satoshi OSHIMA
This patch set try to introduce memory usage accounting for 

UDP(currently ipv4 only).


3 points are improved along with some feedback.


(a) to improve scalability, avoiding atomic_*()s as small as

possible

(b) avoiding UDP specific code in IP layer

(c) supporting socket destruction accounting


To implement (b), there is a side effect which affects

accounting on TCP socket. If you find the good solution

to avoid this side effect, please let me know.


Unfortunately, I don't have any NIC with UFO.

So this patch set is not tested with UFO supported

device.


This patch set is for 2.6.23-rc8.


I appreciate your comment/test/feedback.


Satoshi Oshima

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 0/3] UDP memory usage accounting

2007-09-21 Thread Satoshi OSHIMA
This patch set try to introduce memory usage accounting for
UDP(currently ipv4 only).

Currently, memory usage of UDP can be observed as the sam of
usage of tx_queue and rx_queue. But I believe that the system
wide accounting is usefull when heavy loaded condition.

In the next step, I would like to add memory usage quota
for UDP to avoid unlimited memory consumption problem
under DDOS attack.

This patch set is for 2.6.23-rc7.

Unfortunately, I don't have any NIC with UFO.
So this patch set is not tested with UFO supported
device.

I appreciate your comment/test/feedback.

Satoshi Oshima
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 3/3] UDP memory usage accounting: measurement

2007-09-21 Thread Satoshi OSHIMA
This patch introduces memory usage measurement for UDP.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc7-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
@@ -743,6 +743,8 @@ static inline int ip_ufo_append_data(str
/* specify the length of each IP datagram fragment*/
skb_shinfo(skb)-gso_size = mtu - fragheaderlen;
skb_shinfo(skb)-gso_type = SKB_GSO_UDP;
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
__skb_queue_tail(sk-sk_write_queue, skb);
 
return 0;
@@ -924,6 +926,9 @@ alloc_new_skb:
}
if (skb == NULL)
goto error;
+   if (sk-sk_protocol == IPPROTO_UDP)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1023,6 +1028,8 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
skb-truesize += PAGE_SIZE;
atomic_add(PAGE_SIZE, sk-sk_wmem_alloc);
+   if (sk-sk_protocol == IPPROTO_UDP)
+   
atomic_inc(sk-sk_prot-memory_allocated);
} else {
err = -EMSGSIZE;
goto error;
@@ -1123,7 +1130,9 @@ ssize_t   ip_append_page(struct sock *sk, 
if (unlikely(!skb)) {
err = -ENOBUFS;
goto error;
-   }
+   } else if (sk-sk_protocol == IPPROTO_UDP)
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
 
/*
 *  Fill in the control structures
@@ -1152,6 +1161,8 @@ ssize_t   ip_append_page(struct sock *sk, 
/*
 * Put the packet on the pending queue.
 */
+   atomic_add(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
__skb_queue_tail(sk-sk_write_queue, skb);
continue;
}
@@ -1202,13 +1213,14 @@ int ip_push_pending_frames(struct sock *
struct iphdr *iph;
__be16 df = 0;
__u8 ttl;
-   int err = 0;
+   int err = 0, send_page_size;
 
if ((skb = __skb_dequeue(sk-sk_write_queue)) == NULL)
goto out;
tail_skb = (skb_shinfo(skb)-frag_list);
 
/* move skb-data to ip header from ext header */
+   send_page_size = sk_datagram_pages(skb-truesize);
if (skb-data  skb_network_header(skb))
__skb_pull(skb, skb_network_offset(skb));
while ((tmp_skb = __skb_dequeue(sk-sk_write_queue)) != NULL) {
@@ -1218,6 +1230,7 @@ int ip_push_pending_frames(struct sock *
skb-len += tmp_skb-len;
skb-data_len += tmp_skb-len;
skb-truesize += tmp_skb-truesize;
+   send_page_size += sk_datagram_pages(tmp_skb-truesize);
__sock_put(tmp_skb-sk);
tmp_skb-destructor = NULL;
tmp_skb-sk = NULL;
@@ -1269,6 +1282,8 @@ int ip_push_pending_frames(struct sock *
/* Netfilter gets whole the not fragmented skb. */
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
  skb-dst-dev, dst_output);
+   if (sk-sk_protocol == IPPROTO_UDP)
+   atomic_sub(send_page_size, sk-sk_prot-memory_allocated);
if (err) {
if (err  0)
err = inet-recverr ? net_xmit_errno(err) : 0;
@@ -1299,8 +1314,12 @@ void ip_flush_pending_frames(struct sock
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
 
-   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL)
+   while ((skb = __skb_dequeue_tail(sk-sk_write_queue)) != NULL) {
+   if (sk-sk_protocol == IPPROTO_UDP)
+   atomic_sub(sk_datagram_pages(skb-truesize),
+  sk-sk_prot-memory_allocated);
kfree_skb(skb);
+   }
 
inet-cork.flags = ~IPCORK_OPT;
kfree(inet-cork.opt);
Index: 2.6.23-rc7-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc7-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc7-udp_limit/net/ipv4/udp.c

[RFC/PATCH 1/3] UDP memory usage accounting: fix send buffer check

2007-09-21 Thread Satoshi OSHIMA
This patch introduces sndbuf size check before
memory allcation for send buffer.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
===
--- 2.6.23-rc7-udp_limit.orig/net/ipv4/ip_output.c
+++ 2.6.23-rc7-udp_limit/net/ipv4/ip_output.c
@@ -1004,6 +1004,11 @@ alloc_new_skb:
frag = skb_shinfo(skb)-frags[i];
}
} else if (i  MAX_SKB_FRAGS) {
+   if (atomic_read(sk-sk_wmem_alloc) + PAGE_SIZE
+2 * sk-sk_sndbuf) {
+   err = -ENOBUFS;
+   goto error;
+   }
if (copy  PAGE_SIZE)
copy = PAGE_SIZE;
page = alloc_pages(sk-sk_allocation, 0);

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 2/3] UDP memory usage accounting: accounting unit and variable

2007-09-21 Thread Satoshi OSHIMA
This patch introduces global variable for UDP memory accounting.
The unit is page.


signed-off-by: Satoshi Oshima [EMAIL PROTECTED]
signed-off-by: Hideo Aoki [EMAIL PROTECTED]

Index: 2.6.23-rc3-udp_limit/include/net/sock.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/sock.h
+++ 2.6.23-rc3-udp_limit/include/net/sock.h
@@ -723,6 +723,13 @@ static inline int sk_stream_wmem_schedul
sk_stream_mem_schedule(sk, size, 0);
}

+#define SK_DATAGRAM_MEM_QUANTUM ((int)PAGE_SIZE)
+
+static inline int sk_datagram_pages(int amt)
+{
+ return DIV_ROUND_UP(amt, SK_DATAGRAM_MEM_QUANTUM);
+}
+
/* Used by processes to lock a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
Index: 2.6.23-rc3-udp_limit/include/net/udp.h
===
--- 2.6.23-rc3-udp_limit.orig/include/net/udp.h
+++ 2.6.23-rc3-udp_limit/include/net/udp.h
@@ -65,6 +65,8 @@ extern rwlock_t udp_hash_lock;

extern struct proto udp_prot;

+extern atomic_t udp_memory_allocated;
+
struct sk_buff;

/*
Index: 2.6.23-rc3-udp_limit/net/ipv4/proc.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/proc.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/proc.c
@@ -66,7 +66,8 @@ static int sockstat_seq_show(struct seq_
fold_prot_inuse(tcp_prot), atomic_read(tcp_orphan_count),
tcp_death_row.tw_count, atomic_read(tcp_sockets_allocated),
atomic_read(tcp_memory_allocated));
- seq_printf(seq, UDP: inuse %d\n, fold_prot_inuse(udp_prot));
+ seq_printf(seq, UDP: inuse %d mem %d\n, fold_prot_inuse(udp_prot),
+ atomic_read(udp_memory_allocated));
seq_printf(seq, UDPLITE: inuse %d\n, fold_prot_inuse(udplite_prot));
seq_printf(seq, RAW: inuse %d\n, fold_prot_inuse(raw_prot));
seq_printf(seq, FRAG: inuse %d memory %d\n, ip_frag_nqueues,
Index: 2.6.23-rc3-udp_limit/net/ipv4/udp.c
===
--- 2.6.23-rc3-udp_limit.orig/net/ipv4/udp.c
+++ 2.6.23-rc3-udp_limit/net/ipv4/udp.c
@@ -113,6 +113,10 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta
struct hlist_head udp_hash[UDP_HTABLE_SIZE];
DEFINE_RWLOCK(udp_hash_lock);

+atomic_t udp_memory_allocated;
+
+EXPORT_SYMBOL(udp_memory_allocated);
+
static int udp_port_rover;

static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head
udptable[])

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html