Re: [PATCH net-next] Per interface IPv4 stats (CONFIG_IP_IFSTATS_TABLE)
Hello, On Thu, 12 Apr 2018, Stephen Suryaputra wrote: > Thanks for the feedbacks. Please see the detail below: > > On Wed, Apr 11, 2018 at 3:37 PM, Julian Anastasovwrote: > [snip] > >> - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); > >> + __IP_INC_STATS(net, skb_dst(skb)->dev, IPSTATS_MIB_INHDRERRORS); > > > > May be skb->dev if we want to account it to the > > input device. > > > Yes. I'm about to make change it but see the next one. > > [snip] > >> diff --git a/net/netfilter/ipvs/ip_vs_xmit.c > >> b/net/netfilter/ipvs/ip_vs_xmit.c > >> index 4527921..32bd3af 100644 > >> --- a/net/netfilter/ipvs/ip_vs_xmit.c > >> +++ b/net/netfilter/ipvs/ip_vs_xmit.c > >> @@ -286,7 +286,7 @@ static inline bool decrement_ttl(struct netns_ipvs > >> *ipvs, > >> { > >> if (ip_hdr(skb)->ttl <= 1) { > >> /* Tell the sender its packet died... */ > >> - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); > >> + __IP_INC_STATS(net, skb_dst(skb)->dev, > >> IPSTATS_MIB_INHDRERRORS); > > > > At this point, skb_dst(skb) can be: > > > > - input route at LOCAL_IN => dst->dev is "lo", skb->dev = input_device > > - output route at LOCAL_OUT => dst->dev is output_device, skb->dev = NULL > > > > We should see this error on LOCAL_IN but better to be > > safe: use 'skb->dev ? : skb_dst(skb)->dev' instead of just > > 'skb_dst(skb)->dev'. > > > This follows v6 implementation in the same function: > > #ifdef CONFIG_IP_VS_IPV6 > if (skb_af == AF_INET6) { > struct dst_entry *dst = skb_dst(skb); > > /* check and decrement ttl */ > if (ipv6_hdr(skb)->hop_limit <= 1) { > /* Force OUTPUT device used as source address */ It looks like IPVS copied it from ip6_forward() but in IPVS context it has its reason: we want ICMP to exit with saddr=Virtual_IP. And we are at LOCAL_IN where there is no output device like in ip6_forward(FORWARD) to use its source address. So, IPVS is special (both input and output path) and needs: IPv4: skb->dev ? : skb_dst(skb)->dev IPv6 needs fix for IPVS stats in decrement_ttl: idev = skb->dev ? __in6_dev_get(skb->dev) : ip6_dst_idev(dst); ... __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); Otherwise, stats will go to "lo" if ip6_dst_idev is used for local input route. So, for accounting on input IPv4 path skb->dev should be used, while for IPv6 some sites may prefer to feed icmpv6_send() with output dst->dev as device containing the source address (skb->dev). But this is unrelated to the stats. > skb->dev = dst->dev; > icmpv6_send(skb, ICMPV6_TIME_EXCEED, > ICMPV6_EXC_HOPLIMIT, 0); > __IP6_INC_STATS(net, ip6_dst_idev(dst), > IPSTATS_MIB_INHDRERRORS); > > return false; > } > > /* don't propagate ttl change to cloned packets */ > if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) > return false; > > ipv6_hdr(skb)->hop_limit--; > } else > #endif > > [snip] > > > > The patch probably has other errors, for example, > > using rt->dst.dev (lo) when rt->dst.error != 0 in ip_error, > > may be 'dev' should be used instead... > > Same also here. Examples are ip6_forward and ip6_pkt_drop. > > I think it's better be counted in the input device for them also. Thoughts? I think so. ipv6_rcv() works with idev = __in6_dev_get(skb->dev) but I don't know IPv6 well and whether ip6_dst_idev(skb_dst(skb)) is correct usage for input path. It should be correct for output path, though. Regards -- Julian Anastasov
Re: [PATCH net-next] Per interface IPv4 stats (CONFIG_IP_IFSTATS_TABLE)
Thanks for the feedbacks. Please see the detail below: On Wed, Apr 11, 2018 at 3:37 PM, Julian Anastasovwrote: [snip] >> - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); >> + __IP_INC_STATS(net, skb_dst(skb)->dev, IPSTATS_MIB_INHDRERRORS); > > May be skb->dev if we want to account it to the > input device. > Yes. I'm about to make change it but see the next one. [snip] >> diff --git a/net/netfilter/ipvs/ip_vs_xmit.c >> b/net/netfilter/ipvs/ip_vs_xmit.c >> index 4527921..32bd3af 100644 >> --- a/net/netfilter/ipvs/ip_vs_xmit.c >> +++ b/net/netfilter/ipvs/ip_vs_xmit.c >> @@ -286,7 +286,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, >> { >> if (ip_hdr(skb)->ttl <= 1) { >> /* Tell the sender its packet died... */ >> - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); >> + __IP_INC_STATS(net, skb_dst(skb)->dev, >> IPSTATS_MIB_INHDRERRORS); > > At this point, skb_dst(skb) can be: > > - input route at LOCAL_IN => dst->dev is "lo", skb->dev = input_device > - output route at LOCAL_OUT => dst->dev is output_device, skb->dev = NULL > > We should see this error on LOCAL_IN but better to be > safe: use 'skb->dev ? : skb_dst(skb)->dev' instead of just > 'skb_dst(skb)->dev'. > This follows v6 implementation in the same function: #ifdef CONFIG_IP_VS_IPV6 if (skb_af == AF_INET6) { struct dst_entry *dst = skb_dst(skb); /* check and decrement ttl */ if (ipv6_hdr(skb)->hop_limit <= 1) { /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); return false; } /* don't propagate ttl change to cloned packets */ if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) return false; ipv6_hdr(skb)->hop_limit--; } else #endif [snip] > > The patch probably has other errors, for example, > using rt->dst.dev (lo) when rt->dst.error != 0 in ip_error, > may be 'dev' should be used instead... Same also here. Examples are ip6_forward and ip6_pkt_drop. I think it's better be counted in the input device for them also. Thoughts? Regards, Stephen.
Re: [PATCH net-next] Per interface IPv4 stats (CONFIG_IP_IFSTATS_TABLE)
Hi Stephen, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on net-next/master] url: https://github.com/0day-ci/linux/commits/Stephen-Suryaputra/Per-interface-IPv4-stats-CONFIG_IP_IFSTATS_TABLE/20180412-181719 reproduce: # apt-get install sparse make ARCH=x86_64 allmodconfig make C=1 CF=-D__CHECK_ENDIAN__ sparse warnings: (new ones prefixed by >>) net/ipv4/proc.c:414:28: sparse: Variable length array is used. >> net/ipv4/proc.c:499:43: sparse: incorrect type in argument 1 (different >> address spaces) @@expected void [noderef] *mib @@got vvoid >> [noderef] *mib @@ net/ipv4/proc.c:499:43:expected void [noderef] *mib net/ipv4/proc.c:499:43:got void [noderef] **pcpumib >> net/ipv4/proc.c:532:34: sparse: cast removes address space of expression net/ipv4/proc.c:534:34: sparse: cast removes address space of expression vim +499 net/ipv4/proc.c 411 412 static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) 413 { > 414 unsigned long buff[TCPUDP_MIB_MAX]; 415 struct net *net = seq->private; 416 int i; 417 418 memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); 419 420 seq_puts(seq, "\nTcp:"); 421 for (i = 0; snmp4_tcp_list[i].name; i++) 422 seq_printf(seq, " %s", snmp4_tcp_list[i].name); 423 424 seq_puts(seq, "\nTcp:"); 425 snmp_get_cpu_field_batch(buff, snmp4_tcp_list, 426 net->mib.tcp_statistics); 427 for (i = 0; snmp4_tcp_list[i].name; i++) { 428 /* MaxConn field is signed, RFC 2012 */ 429 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) 430 seq_printf(seq, " %ld", buff[i]); 431 else 432 seq_printf(seq, " %lu", buff[i]); 433 } 434 435 memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); 436 437 snmp_get_cpu_field_batch(buff, snmp4_udp_list, 438 net->mib.udp_statistics); 439 seq_puts(seq, "\nUdp:"); 440 for (i = 0; snmp4_udp_list[i].name; i++) 441 seq_printf(seq, " %s", snmp4_udp_list[i].name); 442 seq_puts(seq, "\nUdp:"); 443 for (i = 0; snmp4_udp_list[i].name; i++) 444 seq_printf(seq, " %lu", buff[i]); 445 446 memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); 447 448 /* the UDP and UDP-Lite MIBs are the same */ 449 seq_puts(seq, "\nUdpLite:"); 450 snmp_get_cpu_field_batch(buff, snmp4_udp_list, 451 net->mib.udplite_statistics); 452 for (i = 0; snmp4_udp_list[i].name; i++) 453 seq_printf(seq, " %s", snmp4_udp_list[i].name); 454 seq_puts(seq, "\nUdpLite:"); 455 for (i = 0; snmp4_udp_list[i].name; i++) 456 seq_printf(seq, " %lu", buff[i]); 457 458 seq_putc(seq, '\n'); 459 return 0; 460 } 461 462 static int snmp_seq_show(struct seq_file *seq, void *v) 463 { 464 snmp_seq_show_ipstats(seq, v); 465 466 icmp_put(seq); /* RFC 2011 compatibility */ 467 icmpmsg_put(seq); 468 469 snmp_seq_show_tcp_udp(seq, v); 470 471 return 0; 472 } 473 474 static int snmp_seq_open(struct inode *inode, struct file *file) 475 { 476 return single_open_net(inode, file, snmp_seq_show); 477 } 478 479 static const struct file_operations snmp_seq_fops = { 480 .open= snmp_seq_open, 481 .read= seq_read, 482 .llseek = seq_lseek, 483 .release = single_release_net, 484 }; 485 486 487 #ifdef CONFIG_IP_IFSTATS_TABLE 488 static void snmp_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, 489 atomic_long_t *smib, 490 const struct snmp_mib *itemlist, 491 char *prefix) 492 { 493 char name[32]; 494 int i; 495 unsigned long val; 496 497 for (i = 0; itemlist[i].name; i++) { 498 val = pcpumib ? > 499 snmp_fold_field64(pcpumib, itemlist[i].entry, 500offsetof(struct ipstats_mib, syncp)) : 501 atomic_long_read(smib + itemlist[i].entry); 502 snprintf(name, sizeof(name), "%s%s", 503 prefix, itemlist[i].name); 504 seq_printf(seq, "%-32s\t%lu\n", name, val); 505 } 506 } 507 508 static void
Re: [PATCH net-next] Per interface IPv4 stats (CONFIG_IP_IFSTATS_TABLE)
Hello, On Tue, 10 Apr 2018, Stephen Suryaputra wrote: > This is enhanced from the proposed patch by Igor Maravic in 2011 to > support per interface IPv4 stats. The enhancement is mainly adding a > kernel configuration option CONFIG_IP_IFSTATS_TABLE. > > Signed-off-by: Stephen Suryaputra> --- ... > diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c > index b54b948..bb9be11 100644 > --- a/net/ipv4/ip_forward.c > +++ b/net/ipv4/ip_forward.c > @@ -66,8 +66,8 @@ static int ip_forward_finish(struct net *net, struct sock > *sk, struct sk_buff *s > { > struct ip_options *opt = &(IPCB(skb)->opt); > > - __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); > - __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len); > + __IP_INC_STATS(net, skb_dst(skb)->dev, IPSTATS_MIB_OUTFORWDATAGRAMS); > + __IP_ADD_STATS(net, skb_dst(skb)->dev, IPSTATS_MIB_OUTOCTETS, skb->len); > > if (unlikely(opt->optlen)) > ip_forward_options(skb); > @@ -121,7 +121,7 @@ int ip_forward(struct sk_buff *skb) > IPCB(skb)->flags |= IPSKB_FORWARDED; > mtu = ip_dst_mtu_maybe_forward(>dst, true); > if (ip_exceeds_mtu(skb, mtu)) { > - IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); > + IP_INC_STATS(net, rt->dst.dev, IPSTATS_MIB_FRAGFAILS); > icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, > htonl(mtu)); > goto drop; > @@ -158,7 +158,7 @@ int ip_forward(struct sk_buff *skb) > > too_many_hops: > /* Tell the sender its packet died... */ > - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); > + __IP_INC_STATS(net, skb_dst(skb)->dev, IPSTATS_MIB_INHDRERRORS); May be skb->dev if we want to account it to the input device. > icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); > drop: > kfree_skb(skb); ... > diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c > index 4527921..32bd3af 100644 > --- a/net/netfilter/ipvs/ip_vs_xmit.c > +++ b/net/netfilter/ipvs/ip_vs_xmit.c > @@ -286,7 +286,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, > { > if (ip_hdr(skb)->ttl <= 1) { > /* Tell the sender its packet died... */ > - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); > + __IP_INC_STATS(net, skb_dst(skb)->dev, > IPSTATS_MIB_INHDRERRORS); At this point, skb_dst(skb) can be: - input route at LOCAL_IN => dst->dev is "lo", skb->dev = input_device - output route at LOCAL_OUT => dst->dev is output_device, skb->dev = NULL We should see this error on LOCAL_IN but better to be safe: use 'skb->dev ? : skb_dst(skb)->dev' instead of just 'skb_dst(skb)->dev'. > icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); > return false; > } The patch probably has other errors, for example, using rt->dst.dev (lo) when rt->dst.error != 0 in ip_error, may be 'dev' should be used instead... Regards -- Julian Anastasov
Re: [PATCH net-next] Per interface IPv4 stats (CONFIG_IP_IFSTATS_TABLE)
I read the thread and understand some of the concerns, but these are useful for network equipments. That's why in this patch, I made it as configuration option. If you have feedbacks, I'll appreciate them. I can resend when net-next is opened again. Thanks. On Wed, Apr 11, 2018 at 12:14 PM, Stephen Hemmingerwrote: > On Tue, 10 Apr 2018 22:55:35 -0400 > Stephen Suryaputra wrote: > >> This is enhanced from the proposed patch by Igor Maravic in 2011 to >> support per interface IPv4 stats. The enhancement is mainly adding a >> kernel configuration option CONFIG_IP_IFSTATS_TABLE. >> >> Signed-off-by: Stephen Suryaputra > > > Net-next is closed. http://vger.kernel.org/~davem/net-next.html > > Also, these kind of statistics have a negative performance impact.
Re: [PATCH net-next] Per interface IPv4 stats (CONFIG_IP_IFSTATS_TABLE)
On Tue, 10 Apr 2018 22:55:35 -0400 Stephen Suryaputrawrote: > This is enhanced from the proposed patch by Igor Maravic in 2011 to > support per interface IPv4 stats. The enhancement is mainly adding a > kernel configuration option CONFIG_IP_IFSTATS_TABLE. > > Signed-off-by: Stephen Suryaputra Net-next is closed. http://vger.kernel.org/~davem/net-next.html Also, these kind of statistics have a negative performance impact.
[PATCH net-next] Per interface IPv4 stats (CONFIG_IP_IFSTATS_TABLE)
This is enhanced from the proposed patch by Igor Maravic in 2011 to support per interface IPv4 stats. The enhancement is mainly adding a kernel configuration option CONFIG_IP_IFSTATS_TABLE. Signed-off-by: Stephen Suryaputra--- drivers/net/vrf.c | 2 +- include/linux/inetdevice.h | 22 +++ include/net/icmp.h | 44 -- include/net/ip.h| 72 +-- include/net/ipv6.h | 62 include/net/netns/mib.h | 3 + include/net/snmp.h | 95 ++ net/bridge/br_netfilter_hooks.c | 10 ++-- net/dccp/ipv4.c | 4 +- net/ipv4/Kconfig| 8 +++ net/ipv4/af_inet.c | 7 ++- net/ipv4/datagram.c | 2 +- net/ipv4/devinet.c | 85 ++- net/ipv4/icmp.c | 32 +- net/ipv4/inet_connection_sock.c | 8 ++- net/ipv4/ip_forward.c | 8 +-- net/ipv4/ip_fragment.c | 20 --- net/ipv4/ip_input.c | 29 - net/ipv4/ip_output.c| 40 - net/ipv4/ipmr.c | 6 +- net/ipv4/ping.c | 9 ++- net/ipv4/proc.c | 126 net/ipv4/raw.c | 4 +- net/ipv4/route.c| 6 +- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/udp.c | 4 +- net/l2tp/l2tp_ip.c | 4 +- net/l2tp/l2tp_ip6.c | 2 +- net/mpls/af_mpls.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- net/sctp/input.c| 2 +- net/sctp/output.c | 2 +- 32 files changed, 570 insertions(+), 156 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0a2b180..509c2ca 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -592,7 +592,7 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; - IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); + IP_UPD_PO_STATS(net, dev, IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index e16fe7d..3d120cb 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -22,6 +22,15 @@ struct ipv4_devconf { #define MC_HASH_SZ_LOG 9 +#ifdef CONFIG_IP_IFSTATS_TABLE +struct ipv4_devstat { + struct proc_dir_entry *proc_dir_entry; + DEFINE_SNMP_STAT(struct ipstats_mib, ip); + DEFINE_SNMP_STAT_ATOMIC(struct icmp_mib_device, icmpdev); + DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib_device, icmpmsgdev); +}; +#endif + struct in_device { struct net_device *dev; refcount_t refcnt; @@ -45,6 +54,9 @@ struct in_device { struct neigh_parms *arp_parms; struct ipv4_devconf cnf; +#ifdef CONFIG_IP_IFSTATS_TABLE + struct ipv4_devstat stats; +#endif struct rcu_head rcu_head; }; @@ -216,6 +228,16 @@ static inline struct in_device *__in_dev_get_rcu(const struct net_device *dev) return rcu_dereference(dev->ip_ptr); } +#ifdef CONFIG_IP_IFSTATS_TABLE +static inline struct in_device *__in_dev_get_rcu_safely(const struct net_device *dev) +{ + if (likely(dev)) + return rcu_dereference(dev->ip_ptr); + else + return NULL; +} +#endif + static inline struct in_device *in_dev_get(const struct net_device *dev) { struct in_device *in_dev; diff --git a/include/net/icmp.h b/include/net/icmp.h index 3ef2743..fdfbc0f 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -29,10 +29,44 @@ struct icmp_err { }; extern const struct icmp_err icmp_err_convert[]; -#define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) -#define __ICMP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.icmp_statistics, field) -#define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) -#define ICMPMSGIN_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) +#ifdef CONFIG_IP_IFSTATS_TABLE +#define ICMP_INC_STATS(net, dev, field)\ + ({ \ + rcu_read_lock();\ + _DEVINCATOMIC(net, icmp, struct in_device, \ + __in_dev_get_rcu_safely(dev), field); \ + rcu_read_unlock(); \ + }) + +#define __ICMP_INC_STATS(net, dev, field) \ + ({ \ + rcu_read_lock();\ + ___DEVINCATOMIC(net, icmp, struct in_device,\ + __in_dev_get_rcu_safely(dev), field); \ + rcu_read_unlock(); \ + }) + +#define