TCP code already has internal memory management for both -- in and out traffic. The outgoing packets are also already auto accounted into kmem (and into cg memory), incoming traffic is not accounted into kmem. And this management is already per-cg thanks to Glauber work some time ago.
So TCP mm fix is -- take existing TCP mem accounting code and add/sub those numbers into cg memory. To avoid double accounting (via TCP hooks and via slub/buddy) the sk_allocation is set to be __GFP_NOACCOUNT. changes: * renamed memcg_(unr)charge_kmem routines Signed-off-by: Pavel Emelyanov <xe...@parallels.com> --- include/linux/memcontrol.h | 2 ++ include/net/sock.h | 2 ++ mm/memcontrol.c | 25 +++++++++++++++++++++++-- net/ipv4/tcp.c | 5 +++++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 6 +++--- 6 files changed, 36 insertions(+), 6 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5507be5..7077044 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -470,6 +470,8 @@ struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr); int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size); void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size); +void memcg_charge_kmem_nofail(struct mem_cgroup *memcg, u64 size); +void __memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size); /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. diff --git a/include/net/sock.h b/include/net/sock.h index 0688f4e..2eb414f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1195,6 +1195,7 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot, struct res_counter *fail; int ret; + memcg_charge_kmem_nofail(prot->memcg, amt << PAGE_SHIFT); ret = res_counter_charge_nofail(prot->memory_allocated, amt << PAGE_SHIFT, &fail); if (ret < 0) @@ -1205,6 +1206,7 @@ static inline void memcg_memory_allocated_sub(struct cg_proto *prot, unsigned long amt) { res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT); + __memcg_uncharge_kmem(prot->memcg, amt << PAGE_SHIFT); } static inline u64 memcg_memory_allocated_read(struct cg_proto *prot) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9dda309..d38868c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -498,7 +498,6 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) /* Writing them here to avoid exposing memcg's inner layout */ #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) - void sock_update_memcg(struct sock *sk) { if (mem_cgroup_sockets_enabled) { @@ -3039,11 +3038,33 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) return ret; } -void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) +void memcg_charge_kmem_nofail(struct mem_cgroup *memcg, u64 size) { + struct res_counter *fail_res; + + /* + * FIXME -- strictly speaking, this value should _also_ + * be charged into kmem counter. But since res_counter_charge + * is sub-optimal (takes locks) AND we do not care much + * about kmem limits (at least for now) we can just directly + * charge into mem counter. + */ + res_counter_charge_nofail(&memcg->res, size, &fail_res); + if (do_swap_account) + res_counter_uncharge(&memcg->memsw, size); +} + +void __memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) +{ + /* FIXME -- uncharge also in kmem counter */ res_counter_uncharge(&memcg->res, size); if (do_swap_account) res_counter_uncharge(&memcg->memsw, size); +} + +void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) +{ + __memcg_uncharge_kmem(memcg, size); /* Not down to 0 */ if (res_counter_uncharge(&memcg->kmem, size)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e641406..8cbf0f5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -413,6 +413,11 @@ void tcp_init_sock(struct sock *sk) sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + /* + * TCP memory is accounted via cg_proto and there's + * no need in additional kmem charging via slub + */ + sk->sk_allocation |= __GFP_NOACCOUNT; icsk->icsk_sync_mss = tcp_sync_mss; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e0a231e..fa94a5a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4541,7 +4541,7 @@ restart: return; if (end - start < copy) copy = end - start; - nskb = alloc_skb(copy + header, GFP_ATOMIC); + nskb = alloc_skb(copy + header, GFP_ATOMIC|__GFP_NOACCOUNT); if (!nskb) return; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 13d440b..a217305 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1061,7 +1061,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, return -ENOMEM; /* Get a new skb... force flag on. */ - buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC); + buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC|__GFP_NOACCOUNT); if (buff == NULL) return -ENOMEM; /* We'll just try again later. */ @@ -1548,7 +1548,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (skb->len != skb->data_len) return tcp_fragment(sk, skb, len, mss_now); - buff = sk_stream_alloc_skb(sk, 0, gfp); + buff = sk_stream_alloc_skb(sk, 0, gfp|__GFP_NOACCOUNT); if (unlikely(buff == NULL)) return -ENOMEM; @@ -1718,7 +1718,7 @@ static int tcp_mtu_probe(struct sock *sk) } /* We're allowed to probe. Build it now. */ - if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) + if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC|__GFP_NOACCOUNT)) == NULL) return -1; sk->sk_wmem_queued += nskb->truesize; sk_mem_charge(sk, nskb->truesize); -- 1.8.3.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel