This patch includes changes in network core sub system for memory accounting.
Memory scheduling, charging, uncharging and reclaiming functions are added. These functions use sk_forward_alloc to store socket local accounting. They also need to use lock to keep consistency of sk_forward_alloc and memory_allocated. They currently support only datagram protocols. sk_datagram_rfree() is a receive buffer detractor for datagram protocols which are capable of protocol specific memory accounting. To enable memory accounting in releasing receive buffer, sock_queue_rcv_skb() is modified although the interface isn't changed. The body of the function is implemented in sock_queue_rcv_skb_with_owner(). Additionally, skb_set_owner_r() is moved to sock.h to core/datagram.c because we want to use it as a call back function. Cc: Satoshi Oshima <[EMAIL PROTECTED]> signed-off-by: Takahiro Yasui <[EMAIL PROTECTED]> signed-off-by: Masami Hiramatsu <[EMAIL PROTECTED]> signed-off-by: Hideo Aoki <[EMAIL PROTECTED]> --- include/net/sock.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++--- net/core/datagram.c | 72 ++++++++++++++++++++++++++++++++ net/core/sock.c | 13 ++++- 3 files changed, 193 insertions(+), 9 deletions(-) diff -pruN net-2.6-udp-take10a4-p1/include/net/sock.h net-2.6-udp-take10a4-p2/include/net/sock.h --- net-2.6-udp-take10a4-p1/include/net/sock.h 2007-12-11 10:54:53.000000000 -0500 +++ net-2.6-udp-take10a4-p2/include/net/sock.h 2007-12-14 20:27:40.000000000 -0500 @@ -750,6 +750,9 @@ static inline struct inode *SOCK_INODE(s return &container_of(socket, struct socket_alloc, socket)->vfs_inode; } +/* + * Functions for memory accounting + */ extern void __sk_stream_mem_reclaim(struct sock *sk); extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); @@ -778,6 +781,107 @@ static inline int sk_stream_wmem_schedul sk_stream_mem_schedule(sk, size, 0); } +extern void __sk_datagram_mem_reclaim(struct sock *sk); +extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); + +#define SK_DATAGRAM_MEM_QUANTUM ((unsigned int)PAGE_SIZE) + +static inline int sk_datagram_pages(int amt) +{ + /* Cast to unsigned as an optimization, since amt is always positive. */ + return DIV_ROUND_UP((unsigned int)amt, SK_DATAGRAM_MEM_QUANTUM); +} + +extern void __sk_datagram_mem_reclaim(struct sock *sk); +extern int sk_datagram_mem_schedule(struct sock *sk, int size, int kind); + +static inline void sk_datagram_mem_reclaim(struct sock *sk) +{ + unsigned long flags; + + if (!sk->sk_prot->memory_allocated) + return; + + spin_lock_irqsave(&sk->sk_lock.slock, flags); + __sk_datagram_mem_reclaim(sk); + spin_unlock_irqrestore(&sk->sk_lock.slock, flags); +} + +static inline int sk_datagram_rmem_schedule(struct sock *sk, int size) +{ + return size <= sk->sk_forward_alloc || + sk_datagram_mem_schedule(sk, size, 1); +} + +static inline int sk_datagram_wmem_schedule(struct sock *sk, int size) +{ + return size <= sk->sk_forward_alloc || + sk_datagram_mem_schedule(sk, size, 0); +} + +static inline void sk_mem_reclaim(struct sock *sk) +{ + if (sk->sk_type == SOCK_DGRAM) + sk_datagram_mem_reclaim(sk); +} + +static inline int sk_wmem_schedule(struct sock *sk, int size) +{ + if (sk->sk_type == SOCK_DGRAM) + return sk_datagram_wmem_schedule(sk, size); + else + return 1; +} + +static inline int sk_account_wmem_charge(struct sock *sk, int size) +{ + unsigned long flags; + + /* account if protocol supports memory accounting. */ + if (!sk->sk_prot->memory_allocated || sk->sk_type != SOCK_DGRAM) + return 1; + + spin_lock_irqsave(&sk->sk_lock.slock, flags); + if (sk_datagram_wmem_schedule(sk, size)) { + sk->sk_forward_alloc -= size; + spin_unlock_irqrestore(&sk->sk_lock.slock, flags); + return 1; + } + spin_unlock_irqrestore(&sk->sk_lock.slock, flags); + return 0; +} + +static inline int sk_account_rmem_charge(struct sock *sk, int size) +{ + unsigned long flags; + + /* account if protocol supports memory accounting. */ + if (!sk->sk_prot->memory_allocated || sk->sk_type != SOCK_DGRAM) + return 1; + + spin_lock_irqsave(&sk->sk_lock.slock, flags); + if (sk_datagram_rmem_schedule(sk, size)) { + sk->sk_forward_alloc -= size; + spin_unlock_irqrestore(&sk->sk_lock.slock, flags); + return 1; + } + spin_unlock_irqrestore(&sk->sk_lock.slock, flags); + return 0; +} + +static inline void sk_account_uncharge(struct sock *sk, int size) +{ + unsigned long flags; + + /* account if protocol supports memory accounting. */ + if (!sk->sk_prot->memory_allocated || sk->sk_type != SOCK_DGRAM) + return; + + spin_lock_irqsave(&sk->sk_lock.slock, flags); + sk->sk_forward_alloc += size; + spin_unlock_irqrestore(&sk->sk_lock.slock, flags); +} + /* Used by processes to "lock" a socket state, so that * interrupts and bottom half handlers won't change it * from under us. It essentially blocks any incoming @@ -1159,18 +1263,19 @@ static inline void skb_set_owner_w(struc atomic_add(skb->truesize, &sk->sk_wmem_alloc); } -static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb->sk = sk; - skb->destructor = sock_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); -} +extern void skb_set_owner_r(struct sk_buff *skb, struct sock *sk); + +void sk_datagram_rfree(struct sk_buff *skb); extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, unsigned long expires); extern void sk_stop_timer(struct sock *sk, struct timer_list* timer); +extern int sock_queue_rcv_skb_with_owner(struct sock *sk, struct sk_buff *skb, + void set_owner_r(struct sk_buff *nskb, + struct sock* nsk)); + extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) diff -pruN net-2.6-udp-take10a4-p1/net/core/datagram.c net-2.6-udp-take10a4-p2/net/core/datagram.c --- net-2.6-udp-take10a4-p1/net/core/datagram.c 2007-12-11 10:54:55.000000000 -0500 +++ net-2.6-udp-take10a4-p2/net/core/datagram.c 2007-12-14 20:26:18.000000000 -0500 @@ -200,6 +200,14 @@ void skb_free_datagram(struct sock *sk, kfree_skb(skb); } +void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) +{ + skb->sk = sk; + skb->destructor = sock_rfree; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); +} +EXPORT_SYMBOL(skb_set_owner_r); + /** * skb_kill_datagram - Free a datagram skbuff forcibly * @sk: socket @@ -484,6 +492,70 @@ fault: } /** + * sk_datagram_rfree - receive buffer detractor for datagram protocls + * @skb: skbuff + */ +void sk_datagram_rfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + skb_truesize_check(skb); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_account_uncharge(sk, skb->truesize); + sk_datagram_mem_reclaim(sk); +} +EXPORT_SYMBOL(sk_datagram_rfree); + +/** + * __sk_datagram_mem_reclaim - send buffer for datagram protocls + * @sk: socket + */ +void __sk_datagram_mem_reclaim(struct sock *sk) +{ + if (sk->sk_forward_alloc < SK_DATAGRAM_MEM_QUANTUM) + return; + + atomic_sub(sk->sk_forward_alloc / SK_DATAGRAM_MEM_QUANTUM, + sk->sk_prot->memory_allocated); + sk->sk_forward_alloc &= SK_DATAGRAM_MEM_QUANTUM - 1; +} +EXPORT_SYMBOL(__sk_datagram_mem_reclaim); + +/** + * sk_datagram_mem_schedule - memory accounting for datagram protocls + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is 0, it means wmem allocation. Otherwise it means rmem + * allocation. + */ +int sk_datagram_mem_schedule(struct sock *sk, int size, int kind) +{ + int amt; + struct proto *prot = sk->sk_prot; + + /* Don't account and limit memory if protocol doesn't support. */ + if (!prot->memory_allocated) + return 1; + + amt = sk_datagram_pages(size); + if (atomic_add_return(amt, prot->memory_allocated) > + prot->sysctl_mem[0]) + if ((kind && atomic_read(&sk->sk_rmem_alloc) + size >= + prot->sysctl_rmem[0]) || + (!kind && atomic_read(&sk->sk_wmem_alloc) + size >= + prot->sysctl_wmem[0])) { + /* Undo changes. */ + atomic_sub(amt, prot->memory_allocated); + return 0; + } + sk->sk_forward_alloc += amt * SK_DATAGRAM_MEM_QUANTUM; + return 1; +} +EXPORT_SYMBOL(sk_datagram_mem_schedule); + +/** * datagram_poll - generic datagram poll * @file: file struct * @sock: socket diff -pruN net-2.6-udp-take10a4-p1/net/core/sock.c net-2.6-udp-take10a4-p2/net/core/sock.c --- net-2.6-udp-take10a4-p1/net/core/sock.c 2007-12-11 10:54:55.000000000 -0500 +++ net-2.6-udp-take10a4-p2/net/core/sock.c 2007-12-14 16:42:06.000000000 -0500 @@ -263,8 +263,9 @@ static void sock_disable_timestamp(struc } } - -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int sock_queue_rcv_skb_with_owner(struct sock *sk, struct sk_buff *skb, + void set_owner_r(struct sk_buff *nskb, + struct sock* nsk)) { int err = 0; int skb_len; @@ -283,7 +284,7 @@ int sock_queue_rcv_skb(struct sock *sk, goto out; skb->dev = NULL; - skb_set_owner_r(skb, sk); + set_owner_r(skb, sk); /* Cache the SKB length before we tack it onto the receive * queue. Once it is added it no longer belongs to us and @@ -299,6 +300,12 @@ int sock_queue_rcv_skb(struct sock *sk, out: return err; } +EXPORT_SYMBOL(sock_queue_rcv_skb_with_owner); + +int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return sock_queue_rcv_skb_with_owner(sk, skb, skb_set_owner_r); +} EXPORT_SYMBOL(sock_queue_rcv_skb); int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) -- Hitachi Computer Products (America) Inc. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html