>> @@ -496,9 +496,28 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup 
>> *memcg)
>>      return (memcg == root_mem_cgroup);
>>  }
>>  
>> +void memcg_charge_kmem_bytes(struct mem_cgroup *cg, unsigned long bytes)
> 
> Please name this function memcg_charge_kmem_nofail. And use memcg for
> instances of mem_cgroup, please. The code is already messy, we'd better
> avoid making it even more inconsistent.

:D   OK

>> +{
>> +    struct res_counter *fail_res;
>> +
>> +    /*
>> +     * FIXME -- strictly speaking, this value should _also_
>> +     * be charged into kmem counter. But since res_counter_charge
>> +     * is sub-optimal (takes locks) AND we do not care much
>> +     * about kmem limits (at least for now) we can just directly
>> +     * charge into mem counter.
>> +     */
>> +    res_counter_charge_nofail(&cg->res, bytes, &fail_res);
> 
> You must also charge cg->memsw if do_swap_account is true.

O_o  Hm, indeed. Charging each counter takes time due to locks. Do you
plan to ... fix this?

> And I don't think charging kmem will make any difference. OTOH not
> charging it may result in mem_cgroup_reparent_charges looping forever.
> 
>> +}
>> +
>> +void memcg_uncharge_kmem_bytes(struct mem_cgroup *cg, unsigned long bytes)
>> +{
>> +    /* FIXME -- uncharge also in kmem counter */
>> +    res_counter_uncharge(&cg->res, bytes);
> 
> You don't check if css reference should be dropped. Please use
> memcg_uncharge_kmem, which does.

By the time network counter hits zero the cgroup is still held by the
socket itself, so what's the point in it?

>> +}
>> +
>>  /* Writing them here to avoid exposing memcg's inner layout */
>>  #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
>> -
> 
> Don't add extra hunks where it is not necessary, please.

:P

>>  void sock_update_memcg(struct sock *sk)
>>  {
>>      if (mem_cgroup_sockets_enabled) {
>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>> index e641406..8cbf0f5 100644
>> --- a/net/ipv4/tcp.c
>> +++ b/net/ipv4/tcp.c
>> @@ -413,6 +413,11 @@ void tcp_init_sock(struct sock *sk)
>>  
>>      sk->sk_write_space = sk_stream_write_space;
>>      sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
>> +    /*
>> +     * TCP memory is accounted via cg_proto and there's
>> +     * no need in additional kmem charging via slub
>> +     */
>> +    sk->sk_allocation |= __GFP_NOACCOUNT;
>>  
>>      icsk->icsk_sync_mss = tcp_sync_mss;
>>  
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index e0a231e..fa94a5a 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -4541,7 +4541,7 @@ restart:
>>                      return;
>>              if (end - start < copy)
>>                      copy = end - start;
>> -            nskb = alloc_skb(copy + header, GFP_ATOMIC);
>> +            nskb = alloc_skb(copy + header, GFP_ATOMIC|__GFP_NOACCOUNT);
>>              if (!nskb)
>>                      return;
>>  
>> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
>> index 13d440b..a217305 100644
>> --- a/net/ipv4/tcp_output.c
>> +++ b/net/ipv4/tcp_output.c
>> @@ -1061,7 +1061,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, 
>> u32 len,
>>              return -ENOMEM;
>>  
>>      /* Get a new skb... force flag on. */
>> -    buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
>> +    buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC|__GFP_NOACCOUNT);
>>      if (buff == NULL)
>>              return -ENOMEM; /* We'll just try again later. */
>>  
>> @@ -1548,7 +1548,7 @@ static int tso_fragment(struct sock *sk, struct 
>> sk_buff *skb, unsigned int len,
>>      if (skb->len != skb->data_len)
>>              return tcp_fragment(sk, skb, len, mss_now);
>>  
>> -    buff = sk_stream_alloc_skb(sk, 0, gfp);
>> +    buff = sk_stream_alloc_skb(sk, 0, gfp|__GFP_NOACCOUNT);
>>      if (unlikely(buff == NULL))
>>              return -ENOMEM;
>>  
>> @@ -1718,7 +1718,7 @@ static int tcp_mtu_probe(struct sock *sk)
>>      }
>>  
>>      /* We're allowed to probe.  Build it now. */
>> -    if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
>> +    if ((nskb = sk_stream_alloc_skb(sk, probe_size, 
>> GFP_ATOMIC|__GFP_NOACCOUNT)) == NULL)
>>              return -1;
>>      sk->sk_wmem_queued += nskb->truesize;
>>      sk_mem_charge(sk, nskb->truesize);
> 
> What about tcp_send_ack, tcp_xmit_probe_skb, tcp_send_active_reset?

Yes, that's messy. The allocations I marked are (should be) "accounted" by TCP
mm (see the sk_mem_charge calls after them) so this __GFP_NOACCOUNT removes 
double
charging (via TCP mm AND slub mm). The ones you point out are not and covered by
TCP mm and (to be 100% correct) should be accounted into physpages via kmem. 
OTOH 
your places are short-living skbs and disappear in a couple of ticks, so even 
if 
we mark them as __GFP_NOACCOUNT we're doing it faster but w/o effect ...

-- Pavel
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to