[RFC PATCH] scripts: add header bloat measuring script

2018-08-18 Thread Rasmus Villemoes
With a little cooperation from fixdep, we can rather easily quantify the
header bloat phenomenon.

While computing CONFIG_ dependencies, fixdep opens all the headers used
by a given translation unit anyway, so it's rather cheap to have it
record the number and total size of those in the generated .o.cmd file.

Those lines can then be post-processed and summarized by the new
header-bloat-stat.pl script. For example, backporting this to v4.17 and
v4.18 releases shows that for a defconfig x86_64 kernel, the median
"bloat factor" (total size of translation unit)/(size of .c file)
increased from 237.7 to 239.8, and the average total translation unit
size grew by 2.5% while the average .c file only increased by
0.4%. While these numbers by themselves are not particularly alarming,
when accumulated over several releases, builds do get noticably slower -
back at v3.0, the median bloat factor was 177.8.

Having infrastrucure like this makes it easier to measure the effect
should anyone attempt something similar to the sched.h cleanup, or just
go over a subsystem trimming unused #includes from .c files (if the
script is passed one or more directories it only processes those).

On a positive note, maybe 4.19 will be a rare exception; as of
1f7a4c73a739, the median bloat factor is down to 236.0, the average .c
file has increased by 0.4% but the average total translation unit is
nevertheless 1.2% smaller, compared to v4.18.

Signed-off-by: Rasmus Villemoes 
---
For some statistics, that also include build times, for releases v3.0
through v4.15, see https://wildmoose.dk/header-bloat/ . I'm not sure
that page will remain forever, so not including the url in the commit
log.

I can certainly understand if people feel this is of too little
utility to hook into fixdep like this. It's certainly possible to do
the same statistics with external tools that just parse the .o.cmd
files themselves.

 scripts/basic/fixdep.c   | 18 +++--
 scripts/header-bloat-stat.pl | 95 
 2 files changed, 109 insertions(+), 4 deletions(-)
 create mode 100755 scripts/header-bloat-stat.pl

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 850966f3d602..f1dec85cf9d9 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -248,7 +248,7 @@ static void parse_config_file(const char *p)
}
 }
 
-static void *read_file(const char *filename)
+static void *read_file(const char *filename, unsigned *size)
 {
struct stat st;
int fd;
@@ -276,6 +276,8 @@ static void *read_file(const char *filename)
}
buf[st.st_size] = '\0';
close(fd);
+   if (size)
+   *size += st.st_size;
 
return buf;
 }
@@ -300,6 +302,8 @@ static void parse_dep_file(char *m, const char *target, int 
insert_extra_deps)
int saw_any_target = 0;
int is_first_dep = 0;
void *buf;
+   unsigned nheaders = 0, c_size = 0, h_size = 0;
+   unsigned *sizevar;
 
while (1) {
/* Skip any "white space" */
@@ -321,6 +325,8 @@ static void parse_dep_file(char *m, const char *target, int 
insert_extra_deps)
/* The /next/ file is the first dependency */
is_first_dep = 1;
} else if (!is_ignored_file(m, p - m)) {
+   sizevar = NULL;
+
*p = '\0';
 
/*
@@ -343,13 +349,16 @@ static void parse_dep_file(char *m, const char *target, 
int insert_extra_deps)
printf("source_%s := %s\n\n",
   target, m);
printf("deps_%s := \\\n", target);
+   sizevar = _size;
}
is_first_dep = 0;
} else {
printf("  %s \\\n", m);
+   sizevar = _size;
+   nheaders++;
}
 
-   buf = read_file(m);
+   buf = read_file(m, sizevar);
parse_config_file(buf);
free(buf);
}
@@ -373,7 +382,8 @@ static void parse_dep_file(char *m, const char *target, int 
insert_extra_deps)
do_extra_deps();
 
printf("\n%s: $(deps_%s)\n\n", target, target);
-   printf("$(deps_%s):\n", target);
+   printf("$(deps_%s):\n\n", target);
+   printf("# header-stats: %u %u %u\n", nheaders, c_size, h_size);
 }
 
 int main(int argc, char *argv[])
@@ -394,7 +404,7 @@ int main(int argc, char *argv[])
 
printf("cmd_%s := %s\n\n", target, cmdline);
 
-   buf = read_file(depfile);
+   buf = read_file(depfile, NULL);
parse_dep_file(buf, target, insert_extra_deps);
free(buf);
 
diff --git a/scripts/header-bloat-stat.pl 

[RFC PATCH] scripts: add header bloat measuring script

2018-08-18 Thread Rasmus Villemoes
With a little cooperation from fixdep, we can rather easily quantify the
header bloat phenomenon.

While computing CONFIG_ dependencies, fixdep opens all the headers used
by a given translation unit anyway, so it's rather cheap to have it
record the number and total size of those in the generated .o.cmd file.

Those lines can then be post-processed and summarized by the new
header-bloat-stat.pl script. For example, backporting this to v4.17 and
v4.18 releases shows that for a defconfig x86_64 kernel, the median
"bloat factor" (total size of translation unit)/(size of .c file)
increased from 237.7 to 239.8, and the average total translation unit
size grew by 2.5% while the average .c file only increased by
0.4%. While these numbers by themselves are not particularly alarming,
when accumulated over several releases, builds do get noticably slower -
back at v3.0, the median bloat factor was 177.8.

Having infrastrucure like this makes it easier to measure the effect
should anyone attempt something similar to the sched.h cleanup, or just
go over a subsystem trimming unused #includes from .c files (if the
script is passed one or more directories it only processes those).

On a positive note, maybe 4.19 will be a rare exception; as of
1f7a4c73a739, the median bloat factor is down to 236.0, the average .c
file has increased by 0.4% but the average total translation unit is
nevertheless 1.2% smaller, compared to v4.18.

Signed-off-by: Rasmus Villemoes 
---
For some statistics, that also include build times, for releases v3.0
through v4.15, see https://wildmoose.dk/header-bloat/ . I'm not sure
that page will remain forever, so not including the url in the commit
log.

I can certainly understand if people feel this is of too little
utility to hook into fixdep like this. It's certainly possible to do
the same statistics with external tools that just parse the .o.cmd
files themselves.

 scripts/basic/fixdep.c   | 18 +++--
 scripts/header-bloat-stat.pl | 95 
 2 files changed, 109 insertions(+), 4 deletions(-)
 create mode 100755 scripts/header-bloat-stat.pl

diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 850966f3d602..f1dec85cf9d9 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -248,7 +248,7 @@ static void parse_config_file(const char *p)
}
 }
 
-static void *read_file(const char *filename)
+static void *read_file(const char *filename, unsigned *size)
 {
struct stat st;
int fd;
@@ -276,6 +276,8 @@ static void *read_file(const char *filename)
}
buf[st.st_size] = '\0';
close(fd);
+   if (size)
+   *size += st.st_size;
 
return buf;
 }
@@ -300,6 +302,8 @@ static void parse_dep_file(char *m, const char *target, int 
insert_extra_deps)
int saw_any_target = 0;
int is_first_dep = 0;
void *buf;
+   unsigned nheaders = 0, c_size = 0, h_size = 0;
+   unsigned *sizevar;
 
while (1) {
/* Skip any "white space" */
@@ -321,6 +325,8 @@ static void parse_dep_file(char *m, const char *target, int 
insert_extra_deps)
/* The /next/ file is the first dependency */
is_first_dep = 1;
} else if (!is_ignored_file(m, p - m)) {
+   sizevar = NULL;
+
*p = '\0';
 
/*
@@ -343,13 +349,16 @@ static void parse_dep_file(char *m, const char *target, 
int insert_extra_deps)
printf("source_%s := %s\n\n",
   target, m);
printf("deps_%s := \\\n", target);
+   sizevar = _size;
}
is_first_dep = 0;
} else {
printf("  %s \\\n", m);
+   sizevar = _size;
+   nheaders++;
}
 
-   buf = read_file(m);
+   buf = read_file(m, sizevar);
parse_config_file(buf);
free(buf);
}
@@ -373,7 +382,8 @@ static void parse_dep_file(char *m, const char *target, int 
insert_extra_deps)
do_extra_deps();
 
printf("\n%s: $(deps_%s)\n\n", target, target);
-   printf("$(deps_%s):\n", target);
+   printf("$(deps_%s):\n\n", target);
+   printf("# header-stats: %u %u %u\n", nheaders, c_size, h_size);
 }
 
 int main(int argc, char *argv[])
@@ -394,7 +404,7 @@ int main(int argc, char *argv[])
 
printf("cmd_%s := %s\n\n", target, cmdline);
 
-   buf = read_file(depfile);
+   buf = read_file(depfile, NULL);
parse_dep_file(buf, target, insert_extra_deps);
free(buf);
 
diff --git a/scripts/header-bloat-stat.pl