[PATCH v6 7/7] net: Suppress the "Comparison to NULL could be written" warnings
This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 32 net/sctp/proc.c | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9d7a39a..b39faf6 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -357,22 +357,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -389,7 +389,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -400,7 +400,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); return 0; @@ -415,13 +415,13 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); snmp_get_cpu_field_batch(buff, snmp4_tcp_list, net->mib.tcp_statistics); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -434,10 +434,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); @@ -446,10 +446,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udplite_statistics); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_u
[PATCH v6 1/7] net:snmp: Introduce generic interfaces for snmp_get_cpu_field{,64}
This is to introduce the generic interfaces for snmp_get_cpu_field{,64}. It exchanges the two for-loops for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> Suggested-by: Marcelo Ricardo Leitner <marcelo.leit...@gmail.com> --- include/net/ip.h | 23 +++ 1 file changed, 23 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92..bc43c0f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL -- 2.5.5
[PATCH v6 5/7] proc: Reduce cache miss in xfrm_statistics_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/xfrm/xfrm_proc.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8..ba2b539 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[LINUX_MIB_XFRMMAX]; struct net *net = seq->private; int i; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, +net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } -- 2.5.5
[PATCH v6 4/7] proc: Reduce cache miss in sctp_snmp_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/sctp/proc.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77..09e16c2 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[SCTP_MIB_MAX]; struct net *net = seq->private; int i; + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, sctp_snmp_list, +net->sctp.sctp_statistics); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } -- 2.5.5
[PATCH v6 2/7] proc: Reduce cache miss in snmp_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Then snmp_seq_show is split into 2 parts to avoid build warning "the frame size" larger than 1024. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 70 ++--- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b6..9d7a39a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include #include +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [m...@utu.fi] */ @@ -378,13 +380,15 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { - int i; struct net *net = seq->private; + u64 buff64[IPSTATS_MIB_MAX]; + int i; - seq_puts(seq, "Ip: Forwarding DefaultTTL"); + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); + seq_puts(seq, "Ip: Forwarding DefaultTTL"); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -393,57 +397,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, -snmp4_ipstats_list[i].entry, -offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + int i; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, +net->mib.tcp_statistics); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udplite_statistics); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu"
[PATCH v6 3/7] proc: Reduce cache miss in snmp6_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/proc.c | 30 ++ 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0..cc8e3ae 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include #include +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { + unsigned long buff[SNMP_MIB_MAX]; int i; - unsigned long val; - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { + u64 buff64[SNMP_MIB_MAX]; int i; + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) -- 2.5.5
[PATCH v6 6/7] ipv6: Remove useless parameter in __snmp6_fill_statsdev
The parameter items(is always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/addrconf.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d4..35d4baa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, [0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, [0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read([i]), [i]); - memset([items], 0, pad); + memset([ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } -- 2.5.5
[PATCH v6 0/7] Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which would cause high cache miss rate. test source code: My simple test case, which read from the procfs items endlessly: /***/ #include #include #include #include #include #define LINELEN 2560 int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0x;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**/ compile and run: gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses #0.11% of all LL-cache hits[40.02%] 6.245162638 seconds time elapsed After the patch set: === Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses #2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses #0.25% of all LL-cache hits[39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% changelog = v6: - correct v5 v5: - order local variables from longest to shortest line v4: - move memset into one block of if statement in snmp6_seq_show_item - remove the changes in netstat_seq_show considerred the stack usage is too large v3: - introduce generic interface (suggested by Marcelo Ricardo Leitner) - use max_t instead of self defined macro (suggested by David Miller) v2: - fix bug in udplite statistics. - snmp_seq_show is split into 2 parts Jia He (7): net:snmp: Introduce generic interfaces for snmp_get_cpu_field{,64} proc: Reduce cache miss in snmp_seq_show proc: Reduce cache miss in snmp6_seq_show proc: Reduce cache miss in sctp_snmp_seq_show proc: Reduce cache miss in xfrm_statistics_seq_show ipv6: Remove useless parameter in __snmp6_fill_statsdev net: Suppress the "Comparison to NULL could be written" warnings include/net/ip.h | 23 net/ipv4/proc.c | 102 +++ net/ipv6/addrconf.c | 12 +++--- net/ipv6/proc.c | 30 +++ net/sctp/proc.c | 10 +++-- net/xfrm/xfrm_proc.c | 10 - 6 files changed, 129 insertions(+), 58 deletions(-) -- 2.5.5
[PATCH v5 6/7] ipv6: Remove useless parameter in __snmp6_fill_statsdev
The parameter items(always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/addrconf.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d4..35d4baa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, [0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, [0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read([i]), [i]); - memset([items], 0, pad); + memset([ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } -- 2.5.5
[PATCH v5 5/7] proc: Reduce cache miss in xfrm_statistics_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/xfrm/xfrm_proc.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8..dc0cea6 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { - struct net *net = seq->private; int i; + struct net *net = seq->private; + unsigned long buff[LINUX_MIB_XFRMMAX]; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, +net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } -- 2.5.5
[PATCH v5 1/7] net:snmp: Introduce generic interfaces for snmp_get_cpu_field{,64}
This is to introduce the generic interfaces for snmp_get_cpu_field{,64}. It exchanges the two for-loops for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> Suggested-by: Marcelo Ricardo Leitner <marcelo.leit...@gmail.com> --- include/net/ip.h | 23 +++ 1 file changed, 23 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92..bc43c0f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL -- 2.5.5
[PATCH v5 3/7] proc: Reduce cache miss in snmp6_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/proc.c | 32 +++- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0..e047701 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include #include +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -192,13 +197,19 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, const struct snmp_mib *itemlist) { int i; - unsigned long val; - - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + unsigned long buff[SNMP_MIB_MAX]; + + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } @@ -206,10 +217,13 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { int i; + u64 buff64[SNMP_MIB_MAX]; + + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) -- 2.5.5
[PATCH v5 7/7] net: Suppress the "Comparison to NULL could be written" warnings
This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 32 net/sctp/proc.c | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e01f4df..1a7a773 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -357,22 +357,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -389,7 +389,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -400,7 +400,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); return 0; @@ -415,13 +415,13 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); snmp_get_cpu_field_batch(buff, snmp4_tcp_list, net->mib.tcp_statistics); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -434,10 +434,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); @@ -446,10 +446,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udplite_statistics); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_u
[PATCH v5 4/7] proc: Reduce cache miss in sctp_snmp_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/sctp/proc.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77..9e7f4a7 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { - struct net *net = seq->private; int i; + struct net *net = seq->private; + unsigned long buff[SCTP_MIB_MAX]; + + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + snmp_get_cpu_field_batch(buff, sctp_snmp_list, +net->sctp.sctp_statistics); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } -- 2.5.5
[PATCH v5 0/7] Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which would cause high cache miss rate. test source code: My simple test case, which read from the procfs items endlessly: /***/ #include #include #include #include #include #define LINELEN 2560 int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0x;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**/ compile and run: gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses #0.11% of all LL-cache hits[40.02%] 6.245162638 seconds time elapsed After the patch set: === Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses #2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses #0.25% of all LL-cache hits[39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% changelog = v5: - order local variables from longest to shortest line v4: - move memset into one block of if statement in snmp6_seq_show_item - remove the changes in netstat_seq_show considerred the stack usage is too large v3: - introduce generic interface (suggested by Marcelo Ricardo Leitner) - use max_t instead of self defined macro (suggested by David Miller) v2: - fix bug in udplite statistics. - snmp_seq_show is split into 2 parts Jia He (7): net:snmp: Introduce generic interfaces for snmp_get_cpu_field{,64} proc: Reduce cache miss in snmp_seq_show proc: Reduce cache miss in snmp6_seq_show proc: Reduce cache miss in sctp_snmp_seq_show proc: Reduce cache miss in xfrm_statistics_seq_show ipv6: Remove useless parameter in __snmp6_fill_statsdev net: Suppress the "Comparison to NULL could be written" warnings include/net/ip.h | 23 net/ipv4/proc.c | 100 +++ net/ipv6/addrconf.c | 12 +++ net/ipv6/proc.c | 32 - net/sctp/proc.c | 12 --- net/xfrm/xfrm_proc.c | 12 +-- 6 files changed, 131 insertions(+), 60 deletions(-) -- 2.5.5
[PATCH v5 2/7] proc: Reduce cache miss in snmp_seq_show
This is to use the generic interfaces snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Then snmp_seq_show is split into 2 parts to avoid build warning "the frame size" larger than 1024. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 68 ++--- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b6..e01f4df 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include #include +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [m...@utu.fi] */ @@ -378,13 +380,15 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { int i; + u64 buff64[IPSTATS_MIB_MAX]; struct net *net = seq->private; - seq_puts(seq, "Ip: Forwarding DefaultTTL"); + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); + seq_puts(seq, "Ip: Forwarding DefaultTTL"); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -393,57 +397,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, -snmp4_ipstats_list[i].entry, -offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + int i; + struct net *net = seq->private; + unsigned long buff[TCPUDP_MIB_MAX]; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, +net->mib.tcp_statistics); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udplite_statistics); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", -
[PATCH v4 0/7] Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which would cause high cache miss rate. Testcase: = My simple test case, which read from the procfs items endlessly: /***/ #include #include #include #include #include #define LINELEN 2560 int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0x;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**/ Compile and run: gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses #0.11% of all LL-cache hits[40.02%] 6.245162638 seconds time elapsed After the patch set: === Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses #2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses #0.25% of all LL-cache hits[39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% v4: - move memset into one block of if statement in snmp6_seq_show_item - remove the changes in netstat_seq_show considerred the stack usage is too large v3: - introduce generic interface (suggested by Marcelo Ricardo Leitner) - use max_t instead of self defined macro (suggested by David Miller) v2: - fix bug in udplite statistics. - snmp_seq_show is split into 2 parts Jia He (7): net:snmp: Introduce generic interfaces for snmp_get_cpu_field{,64} proc: Reduce cache miss in snmp_seq_show proc: Reduce cache miss in snmp6_seq_show proc: Reduce cache miss in sctp_snmp_seq_show proc: Reduce cache miss in xfrm_statistics_seq_show ipv6: Remove useless parameter in __snmp6_fill_statsdev net: Suppress the "Comparison to NULL could be written" warnings include/net/ip.h | 23 net/ipv4/proc.c | 100 +++ net/ipv6/addrconf.c | 12 +++ net/ipv6/proc.c | 32 - net/sctp/proc.c | 10 -- net/xfrm/xfrm_proc.c | 10 -- 6 files changed, 129 insertions(+), 58 deletions(-) -- 2.5.5
[PATCH v4 6/7] ipv6: Remove useless parameter in __snmp6_fill_statsdev
The parameter items(always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/addrconf.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d4..35d4baa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4961,18 +4961,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, [0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, [0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read([i]), [i]); - memset([items], 0, pad); + memset([ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5005,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } -- 2.5.5
[PATCH v4 5/7] proc: Reduce cache miss in xfrm_statistics_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/xfrm/xfrm_proc.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8..2b280fc 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -52,10 +52,16 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; int i; + unsigned long buff[LINUX_MIB_XFRMMAX]; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, +net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } -- 2.5.5
[PATCH v4 4/7] proc: Reduce cache miss in sctp_snmp_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/sctp/proc.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77..0487c01 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -75,11 +75,15 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; int i; + unsigned long buff[SCTP_MIB_MAX]; + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, sctp_snmp_list, +net->sctp.sctp_statistics); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } -- 2.5.5
[PATCH v4 3/7] proc: Reduce cache miss in snmp6_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/proc.c | 32 +++- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0..e047701 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include #include +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -192,13 +197,19 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, const struct snmp_mib *itemlist) { int i; - unsigned long val; - - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + unsigned long buff[SNMP_MIB_MAX]; + + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } @@ -206,10 +217,13 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { int i; + u64 buff64[SNMP_MIB_MAX]; + + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) -- 2.5.5
[PATCH v4 7/7] net: Suppress the "Comparison to NULL could be written" warnings
This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 32 net/sctp/proc.c | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index cb04589..1ccd1b2 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -357,22 +357,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -389,7 +389,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -400,7 +400,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); return 0; @@ -415,13 +415,13 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); snmp_get_cpu_field_batch(buff, snmp4_tcp_list, net->mib.tcp_statistics); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -434,10 +434,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); @@ -446,10 +446,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udplite_statistics); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_u
[PATCH v4 1/7] net:snmp: Introduce generic interfaces for snmp_get_cpu_field{,64}
This is to introduce the generic interfaces for snmp_get_cpu_field{,64}. It exchanges the two for-loops for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> Suggested-by: Marcelo Ricardo Leitner <marcelo.leit...@gmail.com> --- include/net/ip.h | 23 +++ 1 file changed, 23 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92..bc43c0f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL -- 2.5.5
[PATCH v4 2/7] proc: Reduce cache miss in snmp_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Then snmp_seq_show is split into 2 parts to avoid build warning "the frame size" larger than 1024. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 68 ++--- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b6..cb04589 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include #include +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [m...@utu.fi] */ @@ -378,13 +380,15 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { int i; + u64 buff64[IPSTATS_MIB_MAX]; struct net *net = seq->private; - seq_puts(seq, "Ip: Forwarding DefaultTTL"); + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); + seq_puts(seq, "Ip: Forwarding DefaultTTL"); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -393,57 +397,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, -snmp4_ipstats_list[i].entry, -offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + int i; + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, +net->mib.tcp_statistics); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udplite_statistics); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", -
[RFC PATCH v3 0/7] Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which will cause high cache miss rate. My simple test case, which read from the procfs items endlessly: /***/ #include #include #include #include #include #define LINELEN 2560 int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0x;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**/ compile and run: gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/netstat perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses #0.11% of all LL-cache hits[40.02%] 6.245162638 seconds time elapsed After the patch set: === Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses #2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses #0.25% of all LL-cache hits[39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% v3: - introduce generic interface (suggested by Marcelo Ricardo Leitner) - use max_t instead of self defined macro (suggested by David Miller) v2: - fix bug in udplite statistics. - snmp_seq_show is split into 2 parts Jia He (7): net:snmp: Introduce generic interfaces for snmp_get_cpu_field{,64} proc: Reduce cache miss in {snmp,netstat}_seq_show proc: Reduce cache miss in snmp6_seq_show proc: Reduce cache miss in sctp_snmp_seq_show proc: Reduce cache miss in xfrm_statistics_seq_show ipv6: Remove useless parameter in __snmp6_fill_statsdev net: Suppress the "Comparison to NULL could be written" warning include/net/ip.h | 23 + net/ipv4/proc.c | 138 +-- net/ipv6/addrconf.c | 12 ++--- net/ipv6/proc.c | 32 net/sctp/proc.c | 10 ++-- net/xfrm/xfrm_proc.c | 10 +++- 6 files changed, 157 insertions(+), 68 deletions(-) -- 1.8.3.1
[RFC PATCH v3 7/7] net: Suppress the "Comparison to NULL could be written" warning
This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 32 net/sctp/proc.c | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index c6fc80e..c6ee8a2 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -357,22 +357,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -389,7 +389,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -400,7 +400,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); return 0; @@ -415,13 +415,13 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); snmp_get_cpu_field_batch(buff, snmp4_tcp_list, net->mib.tcp_statistics); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -434,10 +434,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %lu", buff[i]); memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); @@ -446,10 +446,10 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) seq_puts(seq, "\nUdpLite:"); snmp_get_cpu_field_batch(buff, snmp4_udp_list, net->mib.udplite_statistics); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_udp_list[i].nam
[RFC PATCH v3 3/7] proc: Reduce cache miss in snmp6_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/proc.c | 32 +++- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0..50ba2c3 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include #include +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -192,13 +197,19 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, const struct snmp_mib *itemlist) { int i; - unsigned long val; - - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + unsigned long buff[SNMP_MIB_MAX]; + + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + if (pcpumib) { + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } @@ -206,10 +217,13 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { int i; + u64 buff64[SNMP_MIB_MAX]; + + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) -- 1.8.3.1
[RFC PATCH v3 6/7] ipv6: Remove useless parameter in __snmp6_fill_statsdev
The parameter items(always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/addrconf.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f418d2e..e170554 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4952,18 +4952,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, [0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, [0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read([i]), [i]); - memset([items], 0, pad); + memset([ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -4996,7 +4996,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } -- 1.8.3.1
[RFC PATCH v3 2/7] proc: Reduce cache miss in {snmp,netstat}_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Then snmp_seq_show and netstat_seq_show are split into 2 parts to avoid build warning "the frame size" larger than 1024 on s390. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 106 +++- 1 file changed, 74 insertions(+), 32 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b6..c6fc80e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include #include +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [m...@utu.fi] */ @@ -378,13 +380,15 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { int i; + u64 buff64[IPSTATS_MIB_MAX]; struct net *net = seq->private; - seq_puts(seq, "Ip: Forwarding DefaultTTL"); + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); + seq_puts(seq, "Ip: Forwarding DefaultTTL"); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -393,57 +397,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, -snmp4_ipstats_list[i].entry, -offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + int i; + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, +net->mib.tcp_statistics); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); + snmp_get_cpu_field_batch(buff, snmp4_udp_list, +net->mib.udplite_statistics); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu&q
[RFC PATCH v3 5/7] proc: Reduce cache miss in xfrm_statistics_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/xfrm/xfrm_proc.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8..2b280fc 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -52,10 +52,16 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; int i; + unsigned long buff[LINUX_MIB_XFRMMAX]; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, +net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } -- 1.8.3.1
[RFC PATCH v3 4/7] proc: Reduce cache miss in sctp_snmp_seq_show
This is to use the generic interface snmp_get_cpu_field{,64}_batch to aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/sctp/proc.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77..0487c01 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -75,11 +75,15 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; int i; + unsigned long buff[SCTP_MIB_MAX]; + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, sctp_snmp_list, +net->sctp.sctp_statistics); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } -- 1.8.3.1
[RFC PATCH v3 1/7] net:snmp: Introduce generic batch interfaces for snmp_get_cpu_field{,64}
This is to introduced the generic batch interfaces for snmp_get_cpu_field{,64}. It exchanges the two for-loops for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> Suggested-by: Marcelo Ricardo Leitner <marcelo.leit...@gmail.com> --- include/net/ip.h | 23 +++ 1 file changed, 23 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92..bc43c0f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL -- 1.8.3.1
[RFC PATCH v2 6/6] net: Suppress the "Comparison to NULL could be written" warning
This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 44 ++-- net/sctp/proc.c | 4 ++-- net/xfrm/xfrm_proc.c | 4 ++-- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f413fdc..bf0bb22 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -358,22 +358,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -390,7 +390,7 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -400,13 +400,13 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for_each_possible_cpu(c) { - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) buff64[i] += snmp_get_cpu_field64( net->mib.ip_statistics, c, snmp4_ipstats_list[i].entry, offsetof(struct ipstats_mib, syncp)); } - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); return 0; @@ -421,17 +421,17 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); for_each_possible_cpu(c) { - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) buff[i] += snmp_get_cpu_field(net->mib.tcp_statistics, c, snmp4_tcp_list[i].entry); } - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -442,15 +442,15 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); for_each_possible_cpu(c) { - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) buff[i] += snmp_get_cpu_field(net->mib.udp_statistics, c, snmp4_udp_list[i].entry); } seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_li
[RFC PATCH v2 3/6] proc: Reduce cache miss in sctp_snmp_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can reduce cache misses by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/sctp/proc.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77..085fb95 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -74,12 +74,19 @@ static const struct snmp_mib sctp_snmp_list[] = { static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - int i; + int i, c; + unsigned long buff[SCTP_MIB_MAX]; + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + for_each_possible_cpu(c) + for (i = 0; sctp_snmp_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field( + net->sctp.sctp_statistics, + c, sctp_snmp_list[i].entry); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } -- 1.8.3.1
[RFC PATCH v2 1/6] proc: Reduce cache miss in {snmp,netstat}_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Then snmp_seq_show is split into 2 parts to avoid build warning "the frame size" larger than 1024. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 112 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b6..f413fdc 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,9 @@ #include #include +#define MAX(a, b) ((u32)(a) >= (u32)(b) ? (a) : (b)) +#define TCPUDP_MIB_MAX MAX(UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [m...@utu.fi] */ @@ -378,13 +381,15 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { - int i; + int i, c; + u64 buff64[IPSTATS_MIB_MAX]; struct net *net = seq->private; - seq_puts(seq, "Ip: Forwarding DefaultTTL"); + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); + seq_puts(seq, "Ip: Forwarding DefaultTTL"); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -393,57 +398,92 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + + for_each_possible_cpu(c) { + for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + buff64[i] += snmp_get_cpu_field64( + net->mib.ip_statistics, + c, snmp4_ipstats_list[i].entry, + offsetof(struct ipstats_mib, syncp)); + } for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, -snmp4_ipstats_list[i].entry, -offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + int i, c; + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; snmp4_tcp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + for_each_possible_cpu(c) { + for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field(net->mib.tcp_statistics, + c, snmp4_tcp_list[i].entry); + } + for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + for_each_possible_cpu(c) { + for (i = 0; snmp4_udp_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field(net->mib.udp_statistics, + c, snmp4_udp_list[i].entry); + } seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); + + memset(
[RFC PATCH v2 5/6] ipv6: Remove useless parameter in __snmp6_fill_statsdev
The parameter items(always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/addrconf.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f418d2e..e170554 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4952,18 +4952,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, [0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, [0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read([i]), [i]); - memset([items], 0, pad); + memset([ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -4996,7 +4996,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } -- 1.8.3.1
[RFC PATCH v2 4/6] proc: Reduce cache miss in xfrm_statistics_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can reduce cache misses by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/xfrm/xfrm_proc.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8..c9df546 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -51,11 +51,20 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - int i; - for (i = 0; xfrm_mib_list[i].name; i++) + int i, c; + unsigned long buff[LINUX_MIB_XFRMMAX]; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + for_each_possible_cpu(c) + for (i = 0; xfrm_mib_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field( + net->mib.xfrm_statistics, + c, xfrm_mib_list[i].entry); + for (i = 0; xfrm_mib_list[i].name != NULL; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } -- 1.8.3.1
[RFC PATCH v2 2/6] proc: Reduce cache miss in snmp6_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can reduce cache misses by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/proc.c | 47 --- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0..c834646 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,13 @@ #include #include +#define MAX(a, b) ((u32)(a) >= (u32)(b) ? (a) : (b)) + +#define MAX4(a, b, c, d) \ + MAX(MAX(a, b), MAX(c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +198,43 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { - int i; - unsigned long val; - - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + int i, c; + unsigned long buff[SNMP_MIB_MAX]; + + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + if (pcpumib) { + for_each_possible_cpu(c) + for (i = 0; itemlist[i].name; i++) + buff[i] += snmp_get_cpu_field(pcpumib, c, + itemlist[i].entry); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { - int i; + int i, c; + u64 buff[SNMP_MIB_MAX]; + + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + for_each_possible_cpu(c) { + for (i = 0; itemlist[i].name; i++) { + buff[i] += snmp_get_cpu_field64(mib, c, + itemlist[i].entry, + syncpoff); + } + } for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) -- 1.8.3.1
[RFC PATCH v2 0/6] Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which will cause high cache miss rate. My simple test case, which read from the procfs items endlessly: /***/ #include #include #include #include #include #define LINELEN 2560 int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0x;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**/ compile and run: gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/netstat perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses #0.11% of all LL-cache hits[40.02%] 6.245162638 seconds time elapsed After the patch set: === Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses #2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses #0.25% of all LL-cache hits[39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% v2: - 1/6 fix bug in udplite statistics. - 1/6 snmp_seq_show is split into 2 parts Jia He (6): proc: Reduce cache miss in {snmp,netstat}_seq_show proc: Reduce cache miss in snmp6_seq_show proc: Reduce cache miss in sctp_snmp_seq_show proc: Reduce cache miss in xfrm_statistics_seq_show ipv6: Remove useless parameter in __snmp6_fill_statsdev net: Suppress the "Comparison to NULL could be written" warning net/ipv4/proc.c | 144 ++- net/ipv6/addrconf.c | 12 ++--- net/ipv6/proc.c | 47 + net/sctp/proc.c | 15 -- net/xfrm/xfrm_proc.c | 15 -- 5 files changed, 162 insertions(+), 71 deletions(-) -- 1.8.3.1
[RFC PATCH 5/6] ipv6: Remove useless parameter in __snmp6_fill_statsdev
The parameter items(always ICMP6_MIB_MAX) is useless for __snmp6_fill_statsdev. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/addrconf.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index df8425f..0fa5aa1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4944,18 +4944,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, [0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, [0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read([i]), [i]); - memset([items], 0, pad); + memset([ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -4988,7 +4988,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } -- 1.8.3.1
[RFC PATCH 2/6] proc: Reduce cache miss in snmp6_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can reduce cache misses by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv6/proc.c | 47 --- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0..c834646 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,13 @@ #include #include +#define MAX(a, b) ((u32)(a) >= (u32)(b) ? (a) : (b)) + +#define MAX4(a, b, c, d) \ + MAX(MAX(a, b), MAX(c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +198,43 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { - int i; - unsigned long val; - - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + int i, c; + unsigned long buff[SNMP_MIB_MAX]; + + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + if (pcpumib) { + for_each_possible_cpu(c) + for (i = 0; itemlist[i].name; i++) + buff[i] += snmp_get_cpu_field(pcpumib, c, + itemlist[i].entry); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { - int i; + int i, c; + u64 buff[SNMP_MIB_MAX]; + + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + for_each_possible_cpu(c) { + for (i = 0; itemlist[i].name; i++) { + buff[i] += snmp_get_cpu_field64(mib, c, + itemlist[i].entry, + syncpoff); + } + } for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) -- 1.8.3.1
[RFC PATCH 1/6] proc: Reduce cache miss in {snmp,netstat}_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. In snmp_seq_show, just use one buff copy to dislay the Udp and UdpLite data because they are the same. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 78 - 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b6..ed09566 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -380,11 +380,15 @@ static void icmp_put(struct seq_file *seq) */ static int snmp_seq_show(struct seq_file *seq, void *v) { - int i; + int i, c; + unsigned long buff[TCP_MIB_MAX]; + u64 buff64[IPSTATS_MIB_MAX]; struct net *net = seq->private; - seq_puts(seq, "Ip: Forwarding DefaultTTL"); + memset(buff, 0, TCP_MIB_MAX * sizeof(unsigned long)); + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); + seq_puts(seq, "Ip: Forwarding DefaultTTL"); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); @@ -393,11 +397,16 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); + + for_each_possible_cpu(c) { + for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + buff64[i] += snmp_get_cpu_field64( + net->mib.ip_statistics, + c, snmp4_ipstats_list[i].entry, + offsetof(struct ipstats_mib, syncp)); + } for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, -snmp4_ipstats_list[i].entry, -offsetof(struct ipstats_mib, syncp))); + seq_printf(seq, " %llu", buff64[i]); icmp_put(seq); /* RFC 2011 compatibility */ icmpmsg_put(seq); @@ -407,38 +416,41 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); + for_each_possible_cpu(c) { + for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field(net->mib.tcp_statistics, + c, snmp4_tcp_list[i].entry); + } + for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCP_MIB_MAX * sizeof(unsigned long)); + + for_each_possible_cpu(c) { + for (i = 0; snmp4_udp_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field(net->mib.udp_statistics, + c, snmp4_udp_list[i].entry); + } seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udplite_statistics, - snmp4_udp_list[i].entry)); + seq_printf(seq, &qu
[RFC PATCH 3/6] proc: Reduce cache miss in sctp_snmp_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can reduce cache misses by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/sctp/proc.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77..085fb95 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -74,12 +74,19 @@ static const struct snmp_mib sctp_snmp_list[] = { static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - int i; + int i, c; + unsigned long buff[SCTP_MIB_MAX]; + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + for_each_possible_cpu(c) + for (i = 0; sctp_snmp_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field( + net->sctp.sctp_statistics, + c, sctp_snmp_list[i].entry); for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } -- 1.8.3.1
[RFC PATCH 0/6] Reduce cache miss for snmp_fold_field
In a PowerPc server with large cpu number(160), besides commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which will cause high cache miss rate. #My simple test case, which read from the procfs items endlessly: /***/ #include #include #include #include #include #define LINELEN 2560 int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0x;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**/ #compile and run: gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/netstat perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat #test results I firstly test the correctness of data statistics. As for performance, before the patch set: Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses #0.11% of all LL-cache hits[40.02%] 6.245162638 seconds time elapsed After the patch set: === Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses #2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses #0.25% of all LL-cache hits[39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% Jia He (6): proc: Reduce cache miss in {snmp,netstat}_seq_show proc: Reduce cache miss in snmp6_seq_show proc: Reduce cache miss in sctp_snmp_seq_show proc: Reduce cache miss in xfrm_statistics_seq_show ipv6: Remove useless parameter in __snmp6_fill_statsdev net: Suppress the "Comparison to NULL could be written" warning net/ipv4/proc.c | 110 ++- net/ipv6/addrconf.c | 12 +++--- net/ipv6/proc.c | 47 -- net/sctp/proc.c | 15 +-- net/xfrm/xfrm_proc.c | 15 +-- 5 files changed, 131 insertions(+), 68 deletions(-) -- 1.8.3.1
[RFC PATCH 4/6] proc: Reduce cache miss in xfrm_statistics_seq_show
This patch exchanges the two loop for collecting the percpu statistics data. This can reduce cache misses by going through all the items of each cpu sequentially. Signed-off-by: Jia He <hejia...@gmail.com> --- net/xfrm/xfrm_proc.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8..c9df546 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -51,11 +51,20 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; - int i; - for (i = 0; xfrm_mib_list[i].name; i++) + int i, c; + unsigned long buff[LINUX_MIB_XFRMMAX]; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + for_each_possible_cpu(c) + for (i = 0; xfrm_mib_list[i].name != NULL; i++) + buff[i] += snmp_get_cpu_field( + net->mib.xfrm_statistics, + c, xfrm_mib_list[i].entry); + for (i = 0; xfrm_mib_list[i].name != NULL; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } -- 1.8.3.1
[RFC PATCH 6/6] net: Suppress the "Comparison to NULL
This is to suppress the checkpatch.pl warning "Comparison to NULL could be written". No functional changes here. Signed-off-by: Jia He <hejia...@gmail.com> --- net/ipv4/proc.c | 42 +- net/sctp/proc.c | 4 ++-- net/xfrm/xfrm_proc.c | 4 ++-- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ed09566..8cbf3e0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -355,22 +355,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -389,7 +389,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -399,30 +399,30 @@ static int snmp_seq_show(struct seq_file *seq, void *v) BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for_each_possible_cpu(c) { - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) buff64[i] += snmp_get_cpu_field64( net->mib.ip_statistics, c, snmp4_ipstats_list[i].entry, offsetof(struct ipstats_mib, syncp)); } - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %llu", buff64[i]); icmp_put(seq); /* RFC 2011 compatibility */ icmpmsg_put(seq); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); for_each_possible_cpu(c) { - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) buff[i] += snmp_get_cpu_field(net->mib.tcp_statistics, c, snmp4_tcp_list[i].entry); } - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); @@ -433,23 +433,23 @@ static int snmp_seq_show(struct seq_file *seq, void *v) memset(buff, 0, TCP_MIB_MAX * sizeof(unsigned long)); for_each_possible_cpu(c) { - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) buff[i] += snmp_get_cpu_field(net->mib.udp_statistics, c, snmp4_udp_list[i].entry); } seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); -
[RFC PATCH 3/3] net: Remove the useless parameter of __snmp6_fill_statsdev
In commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), __snmp6_fill_stats64 had been optimized by removing parameter items, so do the same for __snmp6_fill_statsdev. Signed-off-by: Jia He <hejia...@gmail.com> Cc: "David S. Miller" <da...@davemloft.net> Cc: Alexey Kuznetsov <kuz...@ms2.inr.ac.ru> Cc: James Morris <jmor...@namei.org> Cc: Hideaki YOSHIFUJI <yoshf...@linux-ipv6.org> Cc: Patrick McHardy <ka...@trash.net> --- net/ipv6/addrconf.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1fce613..37ea2bb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4944,18 +4944,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, [0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, [0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read([i]), [i]); - memset([items], 0, pad); + memset([ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -4987,7 +4987,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } -- 2.5.0
[RFC PATCH 2/3] net: Replace for_each_possible_cpu with for_each_online_cpu
In PowerPC server with large number cpus, the loop index in smt=1 could be reduced to 1/8 compared with smt=8. Thus cache misses can be reduced. Signed-off-by: Jia He <hejia...@gmail.com> Cc: "David S. Miller" <da...@davemloft.net> Cc: Alexey Kuznetsov <kuz...@ms2.inr.ac.ru> Cc: James Morris <jmor...@namei.org> Cc: Hideaki YOSHIFUJI <yoshf...@linux-ipv6.org> Cc: Patrick McHardy <ka...@trash.net> --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 43fa8d0..1fce613 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4969,7 +4969,7 @@ static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, buff[0] = IPSTATS_MIB_MAX; - for_each_possible_cpu(c) { + for_each_online_cpu(c) { for (i = 1; i < IPSTATS_MIB_MAX; i++) buff[i] += snmp_get_cpu_field64(mib, c, i, syncpoff); } -- 2.5.0
[RFC PATCH 0/3] net: Improve snmp6_fill_stats
This is the follow up work of commit a3a773726c9f ("net: Optimize snmp stat aggregation by walking all the percpu data at once") Jia He (3): net: Remove unnecessary memset in __snmp6_fill_stats64 net: Replace for_each_possible_cpu with for_each_online_cpu net: Remove the useless parameter of __snmp6_fill_statsdev net/ipv6/addrconf.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) -- 2.5.0
[RFC PATCH 1/3] net: Remove unnecessary memset in __snmp6_fill_stats64
buff[] will be assigned later, so memset is not necessary. Signed-off-by: Jia He <hejia...@gmail.com> Cc: "David S. Miller" <da...@davemloft.net> Cc: Alexey Kuznetsov <kuz...@ms2.inr.ac.ru> Cc: James Morris <jmor...@namei.org> Cc: Hideaki YOSHIFUJI <yoshf...@linux-ipv6.org> Cc: Patrick McHardy <ka...@trash.net> --- net/ipv6/addrconf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab3e796..43fa8d0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4967,7 +4967,6 @@ static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, BUG_ON(pad < 0); - memset(buff, 0, sizeof(buff)); buff[0] = IPSTATS_MIB_MAX; for_each_possible_cpu(c) { -- 2.5.0