TCP code already has internal memory management for both -- in
and out traffic. The outgoing packets are also already auto
accounted into kmem (and into cg memory), incoming traffic is
not accounted into kmem. And this management is already per-cg
thanks to Glauber work some time ago.

So TCP mm fix is -- take existing TCP mem accounting code and
add/sub those numbers into cg memory. To avoid double accounting
(via TCP hooks and via slub/buddy) the sk_allocation is set to
be __GFP_NOACCOUNT.

Signed-off-by: Pavel Emelyanov <[email protected]>

---
 include/linux/memcontrol.h |  3 +++
 include/net/sock.h         |  2 ++
 mm/memcontrol.c            | 21 ++++++++++++++++++++-
 net/ipv4/tcp.c             |  5 +++++
 net/ipv4/tcp_input.c       |  2 +-
 net/ipv4/tcp_output.c      |  6 +++---
 6 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5507be5..d250663 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -421,6 +421,9 @@ static inline void sock_release_memcg(struct sock *sk)
 }
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
 
+void memcg_charge_kmem_bytes(struct mem_cgroup *mcg, unsigned long bytes);
+void memcg_uncharge_kmem_bytes(struct mem_cgroup *mcg, unsigned long bytes);
+
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 0688f4e..6adab3c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1195,6 +1195,7 @@ static inline void memcg_memory_allocated_add(struct 
cg_proto *prot,
        struct res_counter *fail;
        int ret;
 
+       memcg_charge_kmem_bytes(prot->memcg, amt << PAGE_SHIFT);
        ret = res_counter_charge_nofail(prot->memory_allocated,
                                        amt << PAGE_SHIFT, &fail);
        if (ret < 0)
@@ -1205,6 +1206,7 @@ static inline void memcg_memory_allocated_sub(struct 
cg_proto *prot,
                                              unsigned long amt)
 {
        res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
+       memcg_uncharge_kmem_bytes(prot->memcg, amt << PAGE_SHIFT);
 }
 
 static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9dda309..d8c305f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -496,9 +496,28 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
*memcg)
        return (memcg == root_mem_cgroup);
 }
 
+void memcg_charge_kmem_bytes(struct mem_cgroup *cg, unsigned long bytes)
+{
+       struct res_counter *fail_res;
+
+       /*
+        * FIXME -- strictly speaking, this value should _also_
+        * be charged into kmem counter. But since res_counter_charge
+        * is sub-optimal (takes locks) AND we do not care much
+        * about kmem limits (at least for now) we can just directly
+        * charge into mem counter.
+        */
+       res_counter_charge_nofail(&cg->res, bytes, &fail_res);
+}
+
+void memcg_uncharge_kmem_bytes(struct mem_cgroup *cg, unsigned long bytes)
+{
+       /* FIXME -- uncharge also in kmem counter */
+       res_counter_uncharge(&cg->res, bytes);
+}
+
 /* Writing them here to avoid exposing memcg's inner layout */
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
-
 void sock_update_memcg(struct sock *sk)
 {
        if (mem_cgroup_sockets_enabled) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e641406..8cbf0f5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -413,6 +413,11 @@ void tcp_init_sock(struct sock *sk)
 
        sk->sk_write_space = sk_stream_write_space;
        sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+       /*
+        * TCP memory is accounted via cg_proto and there's
+        * no need in additional kmem charging via slub
+        */
+       sk->sk_allocation |= __GFP_NOACCOUNT;
 
        icsk->icsk_sync_mss = tcp_sync_mss;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e0a231e..fa94a5a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4541,7 +4541,7 @@ restart:
                        return;
                if (end - start < copy)
                        copy = end - start;
-               nskb = alloc_skb(copy + header, GFP_ATOMIC);
+               nskb = alloc_skb(copy + header, GFP_ATOMIC|__GFP_NOACCOUNT);
                if (!nskb)
                        return;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 13d440b..a217305 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1061,7 +1061,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, 
u32 len,
                return -ENOMEM;
 
        /* Get a new skb... force flag on. */
-       buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+       buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC|__GFP_NOACCOUNT);
        if (buff == NULL)
                return -ENOMEM; /* We'll just try again later. */
 
@@ -1548,7 +1548,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff 
*skb, unsigned int len,
        if (skb->len != skb->data_len)
                return tcp_fragment(sk, skb, len, mss_now);
 
-       buff = sk_stream_alloc_skb(sk, 0, gfp);
+       buff = sk_stream_alloc_skb(sk, 0, gfp|__GFP_NOACCOUNT);
        if (unlikely(buff == NULL))
                return -ENOMEM;
 
@@ -1718,7 +1718,7 @@ static int tcp_mtu_probe(struct sock *sk)
        }
 
        /* We're allowed to probe.  Build it now. */
-       if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
+       if ((nskb = sk_stream_alloc_skb(sk, probe_size, 
GFP_ATOMIC|__GFP_NOACCOUNT)) == NULL)
                return -1;
        sk->sk_wmem_queued += nskb->truesize;
        sk_mem_charge(sk, nskb->truesize);
-- 
1.8.3.1


_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to