Re: [PATCH] tcp: Support tcp socket allocated counter in namespace.

2018-02-28 Thread Stephen Hemminger
On Mon, 12 Feb 2018 18:44:00 -0800
Tonghao Zhang  wrote:

> Sometimes, we want to know how many tcp sockets are in use
> different _net_ namespaces. It's a key resource metric. With
> this patch, we can get it via /proc/net/sockstat.
> 
> The 'alloc' show in /proc/net/sockstat is the total tcp
> sockets in the kernel. This patch moves it to namespace,
> via adding a counter because the previous counter is used
> in proto(e.g tcp, udp and sctp) memory management.
> 
> Signed-off-by: Tonghao Zhang 
> ---
>  include/net/netns/ipv4.h |  3 +++
>  include/net/tcp.h|  2 ++
>  net/ipv4/proc.c  |  2 +-
>  net/ipv4/tcp.c   |  2 ++
>  net/ipv4/tcp_ipv4.c  | 34 ++
>  net/ipv4/tcp_minisocks.c |  3 +++
>  6 files changed, 45 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 44668c2..85e91bd 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -68,6 +68,9 @@ struct netns_ipv4 {
>  
>   struct inet_peer_base   *peers;
>   struct sock  * __percpu *tcp_sk;
> +#ifdef CONFIG_PROC_FS
> + int __percpu *tcp_sock_allocated;
> +#endif
>   struct netns_frags  frags;
>  #ifdef CONFIG_NETFILTER
>   struct xt_table *iptable_filter;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 093e967..4b24b6e 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1780,6 +1780,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
>  int tcp_gro_complete(struct sk_buff *skb);
>  
>  void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
> +void tcp_sock_allocated_add(struct net *net, int val);
> +int tcp_sock_allocated_get(struct net *net);
>  
>  static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
>  {
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index dc5edc8..8a147f7 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -58,7 +58,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
>   int orphans, sockets;
>  
>   orphans = percpu_counter_sum_positive(_orphan_count);
> - sockets = proto_sockets_allocated_sum_positive(_prot);
> + sockets = tcp_sock_allocated_get(net);
>  
>   socket_seq_show(seq);
>   seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 874c931..77fe4a5 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -453,6 +453,8 @@ void tcp_init_sock(struct sock *sk)
>   sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
>  
>   sk_sockets_allocated_inc(sk);
> + if (likely(sk->sk_net_refcnt))
> + tcp_sock_allocated_add(sock_net(sk), 1);
>  }
>  EXPORT_SYMBOL(tcp_init_sock);
>  
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 95738aa..a7bd0c4 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1928,6 +1928,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
>   tcp_saved_syn_free(tp);
>  
>   sk_sockets_allocated_dec(sk);
> + if (likely(sk->sk_net_refcnt))
> + tcp_sock_allocated_add(sock_net(sk), -1);
>  }
>  EXPORT_SYMBOL(tcp_v4_destroy_sock);
>  
> @@ -2446,6 +2448,28 @@ struct proto tcp_prot = {
>  };
>  EXPORT_SYMBOL(tcp_prot);
>  
> +void tcp_sock_allocated_add(struct net *net, int val)
> +{
> +#ifdef CONFIG_PROC_FS
> + this_cpu_add(*net->ipv4.tcp_sock_allocated, val);
> +#endif
> +}
> +EXPORT_SYMBOL(tcp_sock_allocated_add);
> +
> +int tcp_sock_allocated_get(struct net *net)
> +{
> +#ifdef CONFIG_PROC_FS
> + int cpu, res = 0;
> +
> + for_each_possible_cpu(cpu)
> + res += *per_cpu_ptr(net->ipv4.tcp_sock_allocated, cpu);
> + return res;
> +#else
> + return 0;
> +#endif
> +}
> +EXPORT_SYMBOL(tcp_sock_allocated_get);
> +
>  static void __net_exit tcp_sk_exit(struct net *net)
>  {
>   int cpu;
> @@ -2455,6 +2479,10 @@ static void __net_exit tcp_sk_exit(struct net *net)
>   for_each_possible_cpu(cpu)
>   inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
>   free_percpu(net->ipv4.tcp_sk);
> +
> +#ifdef CONFIG_PROC_FS
> + free_percpu(net->ipv4.tcp_sock_allocated);
> +#endif
>  }
>  
>  static int __net_init tcp_sk_init(struct net *net)
> @@ -2465,6 +2493,12 @@ static int __net_init tcp_sk_init(struct net *net)
>   if (!net->ipv4.tcp_sk)
>   return -ENOMEM;
>  
> +#ifdef CONFIG_PROC_FS
> + net->ipv4.tcp_sock_allocated = alloc_percpu(int);
> + if (!net->ipv4.tcp_sock_allocated)
> + goto fail;
> +#endif
> +
>   for_each_possible_cpu(cpu) {
>   struct sock *sk;
>  
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index a8384b0c..573fe43 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -559,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock 
> *sk,
>   

Re: [PATCH] tcp: Support tcp socket allocated counter in namespace.

2018-02-28 Thread Tonghao Zhang
On Wed, Feb 28, 2018 at 10:41 PM, David Miller  wrote:
> From: Tonghao Zhang 
> Date: Wed, 28 Feb 2018 21:01:52 +0800
>
>>> The amount of new conditional tests in these fast paths are not
>>> justified for this new counter which is of debatable usefullness.
>> sorry, too late for reply. Did you mean this counter will affect performance 
>> ?
>> I tested the patch witch netperf.
>
> A single flow TCP session with no loss doesn't tell us much.
>
> I just know from how these things go that under load, and with
> all kinds of different flows with different characterstics,
> every conditional test in the fast paths matter.
>
> If you want to have a chance at your change getting accepted,
> running all kinds of benchmarks won't do it.
>
> Instead, please find a way to make your feature work without all of
> the newly added tests (whilst not adding another cost at the same
> time).
Thanks for your explanation.

> Thank you.


Re: [PATCH] tcp: Support tcp socket allocated counter in namespace.

2018-02-28 Thread David Miller
From: Tonghao Zhang 
Date: Wed, 28 Feb 2018 21:01:52 +0800

>> The amount of new conditional tests in these fast paths are not
>> justified for this new counter which is of debatable usefullness.
> sorry, too late for reply. Did you mean this counter will affect performance ?
> I tested the patch witch netperf.

A single flow TCP session with no loss doesn't tell us much.

I just know from how these things go that under load, and with
all kinds of different flows with different characterstics,
every conditional test in the fast paths matter.

If you want to have a chance at your change getting accepted,
running all kinds of benchmarks won't do it.

Instead, please find a way to make your feature work without all of
the newly added tests (whilst not adding another cost at the same
time).

Thank you.


Re: [PATCH] tcp: Support tcp socket allocated counter in namespace.

2018-02-28 Thread Tonghao Zhang
On Wed, Feb 14, 2018 at 1:19 AM, David Miller  wrote:
> From: Tonghao Zhang 
> Date: Mon, 12 Feb 2018 18:44:00 -0800
>
>> Sometimes, we want to know how many tcp sockets are in use
>> different _net_ namespaces. It's a key resource metric. With
>> this patch, we can get it via /proc/net/sockstat.
>>
>> The 'alloc' show in /proc/net/sockstat is the total tcp
>> sockets in the kernel. This patch moves it to namespace,
>> via adding a counter because the previous counter is used
>> in proto(e.g tcp, udp and sctp) memory management.
>>
>> Signed-off-by: Tonghao Zhang 
>  ...
>> @@ -453,6 +453,8 @@ void tcp_init_sock(struct sock *sk)
>>   sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
>>
>>   sk_sockets_allocated_inc(sk);
>> + if (likely(sk->sk_net_refcnt))
>> + tcp_sock_allocated_add(sock_net(sk), 1);
>>  }
>>  EXPORT_SYMBOL(tcp_init_sock);
>>
>  ...
>> @@ -1928,6 +1928,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
>>   tcp_saved_syn_free(tp);
>>
>>   sk_sockets_allocated_dec(sk);
>> + if (likely(sk->sk_net_refcnt))
>> + tcp_sock_allocated_add(sock_net(sk), -1);
>>  }
>>  EXPORT_SYMBOL(tcp_v4_destroy_sock);
>>
>  ...
>> @@ -559,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock 
>> *sk,
>>   newtp->rack.reo_wnd_persist = 0;
>>   newtp->rack.dsack_seen = 0;
>>
>> + if (likely(newsk->sk_net_refcnt))
>> + tcp_sock_allocated_add(sock_net(newsk), 1);
>> +
>>   __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
>
> The amount of new conditional tests in these fast paths are not
> justified for this new counter which is of debatable usefullness.
sorry, too late for reply. Did you mean this counter will affect performance ?
I tested the patch witch netperf.

Before patch:
# netperf -H 192.168.3.5 -t TCP_CRR -l 60 -- -O "THROUGHPUT,
THROUGHPUT_UNITS, MIN_LATENCY, MAX_LATENCY, MEAN_LATENCY"
MIGRATED TCP Connect/Request/Response TEST from 0.0.0.0 (0.0.0.0) port
0 AF_INET to 192.168.3.5 () port 0 AF_INET
Throughput Throughput Minimum  Maximum  Mean
   Units  Latency  Latency  Latency
  Microseconds Microseconds Microseconds
7719.86Trans/s70   660  129.32


# netperf -H 192.168.3.5 -t TCP_CRR -l 60 -- -O "THROUGHPUT,
THROUGHPUT_UNITS, MIN_LATENCY, MAX_LATENCY, MEAN_LATENCY"
MIGRATED TCP Connect/Request/Response TEST from 0.0.0.0 (0.0.0.0) port
0 AF_INET to 192.168.3.5 () port 0 AF_INET
Throughput Throughput Minimum  Maximum  Mean
   Units  Latency  Latency  Latency
  Microseconds Microseconds Microseconds
7574.00Trans/s86   13016131.82


# netperf -H 192.168.3.5 -t TCP_CRR -l 60 -- -O "THROUGHPUT,
THROUGHPUT_UNITS, MIN_LATENCY, MAX_LATENCY, MEAN_LATENCY"
MIGRATED TCP Connect/Request/Response TEST from 0.0.0.0 (0.0.0.0) port
0 AF_INET to 192.168.3.5 () port 0 AF_INET
Throughput Throughput Minimum  Maximum  Mean
   Units  Latency  Latency  Latency
  Microseconds Microseconds Microseconds
7556.66Trans/s87   15152132.06


After patch:

# netperf -H 192.168.3.5 -t TCP_CRR -l 60 -- -O "THROUGHPUT,
THROUGHPUT_UNITS, MIN_LATENCY, MAX_LATENCY, MEAN_LATENCY"
MIGRATED TCP Connect/Request/Response TEST from 0.0.0.0 (0.0.0.0) port
0 AF_INET to 192.168.3.5 () port 0 AF_INET
Throughput Throughput Minimum  Maximum  Mean
   Units  Latency  Latency  Latency
  Microseconds Microseconds Microseconds
7277.86Trans/s86   14884137.19



# netperf -H 192.168.3.5 -t TCP_CRR -l 60 -- -O "THROUGHPUT,
THROUGHPUT_UNITS, MIN_LATENCY, MAX_LATENCY, MEAN_LATENCY"
MIGRATED TCP Connect/Request/Response TEST from 0.0.0.0 (0.0.0.0) port
0 AF_INET to 192.168.3.5 () port 0 AF_INET
Throughput Throughput Minimum  Maximum  Mean
   Units  Latency  Latency  Latency
  Microseconds Microseconds Microseconds
7534.73Trans/s92   7531 132.50

# netperf -H 192.168.3.5 -t TCP_CRR -l 60 -- -O "THROUGHPUT,
THROUGHPUT_UNITS, MIN_LATENCY, MAX_LATENCY, MEAN_LATENCY"
MIGRATED TCP Connect/Request/Response TEST from 0.0.0.0 (0.0.0.0) port
0 AF_INET to 192.168.3.5 () port 0 AF_INET
Throughput Throughput Minimum  Maximum  Mean
   Units  Latency  Latency  Latency
  Microseconds Microseconds Microseconds
7622.17Trans/s89   2370 130.98


> I'm not applying this, sorry.


Re: [PATCH] tcp: Support tcp socket allocated counter in namespace.

2018-02-13 Thread David Miller
From: Tonghao Zhang 
Date: Mon, 12 Feb 2018 18:44:00 -0800

> Sometimes, we want to know how many tcp sockets are in use
> different _net_ namespaces. It's a key resource metric. With
> this patch, we can get it via /proc/net/sockstat.
> 
> The 'alloc' show in /proc/net/sockstat is the total tcp
> sockets in the kernel. This patch moves it to namespace,
> via adding a counter because the previous counter is used
> in proto(e.g tcp, udp and sctp) memory management.
> 
> Signed-off-by: Tonghao Zhang 
 ...
> @@ -453,6 +453,8 @@ void tcp_init_sock(struct sock *sk)
>   sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
>  
>   sk_sockets_allocated_inc(sk);
> + if (likely(sk->sk_net_refcnt))
> + tcp_sock_allocated_add(sock_net(sk), 1);
>  }
>  EXPORT_SYMBOL(tcp_init_sock);
>  
 ...
> @@ -1928,6 +1928,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
>   tcp_saved_syn_free(tp);
>  
>   sk_sockets_allocated_dec(sk);
> + if (likely(sk->sk_net_refcnt))
> + tcp_sock_allocated_add(sock_net(sk), -1);
>  }
>  EXPORT_SYMBOL(tcp_v4_destroy_sock);
>  
 ...
> @@ -559,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock 
> *sk,
>   newtp->rack.reo_wnd_persist = 0;
>   newtp->rack.dsack_seen = 0;
>  
> + if (likely(newsk->sk_net_refcnt))
> + tcp_sock_allocated_add(sock_net(newsk), 1);
> +
>   __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);

The amount of new conditional tests in these fast paths are not
justified for this new counter which is of debatable usefullness.

I'm not applying this, sorry.