On Fri, Mar 29, 2024 at 8:02 PM Jørgen Kvalsvik <j...@lambda.is> wrote: > > This is a prototype for --include/--exclude flags, and I would like a > review of both the approach and architecture, and the implementation, > plus feedback on the feature itself. I did not update the manuals or > carefully extend --help, in case the interface itself needs some > revision before it can be merged. > > --- > > Add the --include and --exclude flags to gcov to control what functions > to report on. This is meant to make gcov more practical as an when > writing test suites or performing other coverage experiments, which > tends to focus on a few functions at the time. This really shines in > combination with the -t/--stdout flag. With support for more expansive > metrics in gcov like modified condition/decision coverage (MC/DC) and > path coverage, output quickly gets overwhelming without filtering. > > The approach is quite simple: filters are egrep regexes and are > evaluated left-to-right, and the last filter "wins", that is, if a > function matches an --include and a subsequent --exclude, it should not > be included in the output. The output machinery is already interacting > with the function table, which makes the json output work as expected, > and only minor changes are needed to suppress the filtered-out > functions. > > Demo: math.c > > int mul (int a, int b) { > return a * b; > } > > int sub (int a, int b) { > return a - b; > } > > int sum (int a, int b) { > return a + b; > } > > Plain matches: > > $ gcov -t math --include=sum > -: 0:Source:filter.c > -: 0:Graph:filter.gcno > -: 0:Data:- > -: 0:Runs:0 > #####: 9:int sum (int a, int b) { > #####: 10: return a + b; > > $ gcov -t math --include=mul > -: 0:Source:filter.c > -: 0:Graph:filter.gcno > -: 0:Data:- > -: 0:Runs:0 > #####: 1:int mul (int a, int b) { > #####: 2: return a * b; > > Regex match: > > $ gcov -t math --include=su > -: 0:Source:filter.c > -: 0:Graph:filter.gcno > -: 0:Data:- > -: 0:Runs:0 > #####: 5:int sub (int a, int b) { > #####: 6: return a - b; > -: 7:} > #####: 9:int sum (int a, int b) { > #####: 10: return a + b; > > And similar for exclude: > > $ gcov -t math --exclude=sum > -: 0:Source:filter.c > -: 0:Graph:filter.gcno > -: 0:Data:- > -: 0:Runs:0 > #####: 1:int mul (int a, int b) { > #####: 2: return a * b; > -: 3:} > #####: 5:int sub (int a, int b) { > #####: 6: return a - b; > > And json, for good measure: > > $ gcov -t math --include=sum --json | jq ".files[].lines[]" > { > "line_number": 9, > "function_name": "sum", > "count": 0, > "unexecuted_block": true, > "block_ids": [], > "branches": [], > "calls": [] > } > { > "line_number": 10, > "function_name": "sum", > "count": 0, > "unexecuted_block": true, > "block_ids": [ > 2 > ], > "branches": [], > "calls": [] > } > > Note that the last function gets "clipped" when lines are associated to > functions, which means the closing brace is dropped from the report. I > hope this can be fixed, but considering it is not really a part of the > function body, the gcov report is "complete". > > Matching generally work well for mangled names, as the mangled names > also have the base symbol name in it. A possible extension to the > filtering commands would be to mix it with demangling to more nicely > being able to filter specific overloads, without manually having to > mangle the interesting symbols. The g++.dg/gcov/gcov-20.C test tests the > matching of a mangled name. > > The dejagnu testing function verify-calls is somewhat minimal, but does > the job well enough. > > Why not just use grep? grep is not really sufficient, as grep is very > line oriented, and the reports that benefit the most from filtering > often span multiple lines, unpredictably.
For JSON output I suppose there's a way to "grep" without the line oriented issue? I suppose we could make the JSON more hierarchical by adding an outer function object? That said, I think this is a useful feature and thus OK for trunk if there are no other comments in about a week if you also update the gcov documentation. Thanks, Richard. > --- > gcc/gcov.cc | 101 +++++++++++++++++++++++-- > gcc/testsuite/g++.dg/gcov/gcov-19.C | 35 +++++++++ > gcc/testsuite/g++.dg/gcov/gcov-20.C | 38 ++++++++++ > gcc/testsuite/gcc.misc-tests/gcov-24.c | 20 +++++ > gcc/testsuite/gcc.misc-tests/gcov-25.c | 23 ++++++ > gcc/testsuite/gcc.misc-tests/gcov-26.c | 23 ++++++ > gcc/testsuite/gcc.misc-tests/gcov-27.c | 22 ++++++ > gcc/testsuite/lib/gcov.exp | 53 ++++++++++++- > 8 files changed, 306 insertions(+), 9 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-19.C > create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-20.C > create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-24.c > create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-25.c > create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-26.c > create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-27.c > > diff --git a/gcc/gcov.cc b/gcc/gcov.cc > index fad704eb7c9..e53765de00a 100644 > --- a/gcc/gcov.cc > +++ b/gcc/gcov.cc > @@ -46,6 +46,7 @@ along with Gcov; see the file COPYING3. If not see > #include "color-macros.h" > #include "pretty-print.h" > #include "json.h" > +#include "xregex.h" > > #include <zlib.h> > #include <getopt.h> > @@ -643,6 +644,43 @@ static int flag_counts = 0; > /* Return code of the tool invocation. */ > static int return_code = 0; > > +/* "Keep policy" when adding functions to the global function table. This > will > + be set to false when --include is used, otherwise every function should be > + added to the table. Used for --include/exclude. */ > +static bool default_keep = true; > + > +/* A 'function filter', a filter and action for determining if a function > + should be included in the output or not. Used for --include/--exclude > + filtering. */ > +struct fnfilter > +{ > + /* The (extended) compiled regex for this filter. */ > + regex_t regex; > + > + /* The action when this filter (regex) matches - if true, the function > should > + be kept, otherwise discarded. */ > + bool keep; > + > + /* Compile the regex EXPR, or exit if pattern is malformed. */ > + void compile (const char *expr) > + { > + int err = regcomp (®ex, expr, REG_NOSUB | REG_EXTENDED); > + if (err) > + { > + size_t len = regerror (err, ®ex, nullptr, 0); > + char *msg = XNEWVEC (char, len); > + regerror (err, ®ex, msg, len); > + fprintf (stderr, "Bad regular expression: %s\n", msg); > + free (msg); > + exit (EXIT_FAILURE); > + } > + } > +}; > + > +/* A collection of filter functions for including/exclude functions in the > + output. This is empty unless --include/--exclude is used. */ > +static vector<fnfilter> filters; > + > /* Forward declarations. */ > static int process_args (int, char **); > static void print_usage (int) ATTRIBUTE_NORETURN; > @@ -933,6 +971,8 @@ print_usage (int error_p) > fnotice (file, " -d, --display-progress Display progress > information\n"); > fnotice (file, " -D, --debug Display debugging > dumps\n"); > fnotice (file, " -f, --function-summaries Output summaries for > each function\n"); > + fnotice (file, " --include Include functions > matching this regex\n"); > + fnotice (file, " --exclude Exclude functions > matching this regex\n"); > fnotice (file, " -h, --help Print this help, then > exit\n"); > fnotice (file, " -j, --json-format Output JSON intermediate > format\n\ > into .gcov.json.gz file\n"); > @@ -984,6 +1024,8 @@ static const struct option options[] = > { "branch-probabilities", no_argument, NULL, 'b' }, > { "branch-counts", no_argument, NULL, 'c' }, > { "json-format", no_argument, NULL, 'j' }, > + { "include", required_argument, NULL, 'I' }, > + { "exclude", required_argument, NULL, 'E' }, > { "human-readable", no_argument, NULL, 'H' }, > { "no-output", no_argument, NULL, 'n' }, > { "long-file-names", no_argument, NULL, 'l' }, > @@ -1034,6 +1076,17 @@ process_args (int argc, char **argv) > case 'l': > flag_long_names = 1; > break; > + case 'I': > + default_keep = false; > + filters.push_back (fnfilter {}); > + filters.back ().keep = true; > + filters.back ().compile (optarg); > + break; > + case 'E': > + filters.push_back (fnfilter {}); > + filters.back ().keep = false; > + filters.back ().compile (optarg); > + break; > case 'H': > flag_human_readable_numbers = 1; > break; > @@ -1641,6 +1694,9 @@ release_structures (void) > it != functions.end (); it++) > delete (*it); > > + for (fnfilter& filter : filters) > + regfree (&filter.regex); > + > sources.resize (0); > names.resize (0); > functions.resize (0); > @@ -1868,8 +1924,6 @@ read_graph_file (void) > unsigned end_column = gcov_read_unsigned (); > > fn = new function_info (); > - functions.push_back (fn); > - ident_to_fn[ident] = fn; > > fn->m_name = function_name; > fn->ident = ident; > @@ -1883,6 +1937,17 @@ read_graph_file (void) > fn->artificial = artificial; > > current_tag = tag; > + > + bool keep = default_keep; > + for (fnfilter& fn : filters) > + if (regexec (&fn.regex, function_name, 0, nullptr, 0) == 0) > + keep = fn.keep; > + > + if (keep) > + { > + functions.push_back (fn); > + ident_to_fn[ident] = fn; > + } > } > else if (fn && tag == GCOV_TAG_BLOCKS) > { > @@ -3182,11 +3247,14 @@ output_lines (FILE *gcov_file, const source_info *src) > > unsigned line_start_group = 0; > vector<function_info *> *fns; > + unsigned filtered_line_end = !filters.empty () ? 0 : source_lines.size (); > > for (unsigned line_num = 1; line_num <= source_lines.size (); line_num++) > { > if (line_num >= src->lines.size ()) > { > + if (!filters.empty ()) > + break; > fprintf (gcov_file, "%9s:%5u", "-", line_num); > print_source_line (gcov_file, source_lines, line_num); > continue; > @@ -3205,11 +3273,26 @@ output_lines (FILE *gcov_file, const source_info *src) > for (unsigned i = 0; i < fns->size (); i++) > if ((*fns)[i]->end_line > line_start_group) > line_start_group = (*fns)[i]->end_line; > + > + /* When filtering, src->lines will be cut short for the last > + selected function. To make sure the "overlapping function" > + section is printed too, adjust the end so that it is within > + src->lines. */ > + if (line_start_group >= src->lines.size ()) > + line_start_group = src->lines.size () - 1; > + > + if (!filters.empty ()) > + filtered_line_end = line_start_group; > } > else if (fns != NULL && fns->size () == 1) > { > function_info *fn = (*fns)[0]; > output_function_details (gcov_file, fn); > + > + /* If functions are filtered, only the matching functions will > be in > + fns and there is no need for extra checking. */ > + if (!filters.empty ()) > + filtered_line_end = fn->end_line; > } > } > > @@ -3219,12 +3302,16 @@ output_lines (FILE *gcov_file, const source_info *src) > Otherwise, print the execution count before the source line. > There are 16 spaces of indentation added before the source > line so that tabs won't be messed up. */ > - output_line_beginning (gcov_file, line->exists, line->unexceptional, > - line->has_unexecuted_block, line->count, > - line_num, "=====", "#####", src->maximum_count); > + if (line_num <= filtered_line_end) > + { > + output_line_beginning (gcov_file, line->exists, line->unexceptional, > + line->has_unexecuted_block, line->count, > + line_num, "=====", "#####", > + src->maximum_count); > > - print_source_line (gcov_file, source_lines, line_num); > - output_line_details (gcov_file, line, line_num); > + print_source_line (gcov_file, source_lines, line_num); > + output_line_details (gcov_file, line, line_num); > + } > > if (line_start_group == line_num) > { > diff --git a/gcc/testsuite/g++.dg/gcov/gcov-19.C > b/gcc/testsuite/g++.dg/gcov/gcov-19.C > new file mode 100644 > index 00000000000..b10226592b4 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/gcov/gcov-19.C > @@ -0,0 +1,35 @@ > +/* { dg-options "-fprofile-arcs -ftest-coverage" } */ > +/* { dg-do run { target native } } */ > + > +/* Filtering on the function base name generally works well, because it > becomes > + an unadultered part of the symbol. */ > + > +template <typename T> > +T > +fn1 (T x) > +{ > + /* fst */ > + return x; > +} > + > +template <typename T> > +T > +fn2 (T x) > +{ > + /* snd */ > + return 2 * x; > +} > + > +int > +main () > +{ > + fn1 (2); > + fn1 (2.0); > + fn1 (2.0f); > + > + fn2 (2); > + fn2 (2.0); > + fn2 (2.0f); > +} > + > +/* { dg-final { run-gcov { filters { fn1 } { fn2 } } { --include=fn1 > gcov-19.C } } } */ > diff --git a/gcc/testsuite/g++.dg/gcov/gcov-20.C > b/gcc/testsuite/g++.dg/gcov/gcov-20.C > new file mode 100644 > index 00000000000..efd8aa72d25 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/gcov/gcov-20.C > @@ -0,0 +1,38 @@ > +/* { dg-options "-fprofile-arcs -ftest-coverage" } */ > +/* { dg-do run { target native } } */ > + > +/* Filtering also works by targeting the mangled symbol directly, but the > + subtlety is not really caught by the test framework. Matching on fn1I[df] > + prints the "overlapping blocks" of both the float and double > instantiation, > + but *not* the int instantiation. The extra {} around the --include > argument > + is quoting for tcl/dejagnu. */ > + > +template <typename T> > +T > +fn1 (T x) > +{ > + /* fst */ > + return x; > +} > + > +template <typename T> > +T > +fn2 (T x) > +{ > + /* snd */ > + return 2 * x; > +} > + > +int > +main () > +{ > + fn1 (2); > + fn1 (2.0); > + fn1 (2.0f); > + > + fn2 (2); > + fn2 (2.0); > + fn2 (2.0f); > +} > + > +/* { dg-final { run-gcov { filters { fst } { snd } } { {--include=fn1I[fd]} > gcov-20.C } } } */ > diff --git a/gcc/testsuite/gcc.misc-tests/gcov-24.c > b/gcc/testsuite/gcc.misc-tests/gcov-24.c > new file mode 100644 > index 00000000000..ce66b15c279 > --- /dev/null > +++ b/gcc/testsuite/gcc.misc-tests/gcov-24.c > @@ -0,0 +1,20 @@ > +/* { dg-options "-fprofile-arcs -ftest-coverage" } */ > +/* { dg-do run { target native } } */ > + > +int > +once (int x) > +{ > + /* fst */ > + return x; > +} > + > +int > +twice (int x) > +{ > + /* snd */ > + return x * 2; > +} > + > +int main () {} > + > +/* { dg-final { run-gcov { filters { fst } { snd main } } { --include=once > gcov-24.c } } } */ > diff --git a/gcc/testsuite/gcc.misc-tests/gcov-25.c > b/gcc/testsuite/gcc.misc-tests/gcov-25.c > new file mode 100644 > index 00000000000..05cfc0f1d82 > --- /dev/null > +++ b/gcc/testsuite/gcc.misc-tests/gcov-25.c > @@ -0,0 +1,23 @@ > +/* { dg-options "-fprofile-arcs -ftest-coverage" } */ > +/* { dg-do run { target native } } */ > + > +/* Filters are considered in order with latest-wins, so if a function is > + included and later excluded it should not show up. */ > + > +int > +fn1 (int x) > +{ > + /* fst */ > + return x; > +} > + > +int > +fn2 (int x) > +{ > + /* snd */ > + return x * 2; > +} > + > +int main () {} > + > +/* { dg-final { run-gcov { filters { snd } { fst main } } { --include=fn > --exclude=1 gcov-25.c } } } */ > diff --git a/gcc/testsuite/gcc.misc-tests/gcov-26.c > b/gcc/testsuite/gcc.misc-tests/gcov-26.c > new file mode 100644 > index 00000000000..246e7b3ef57 > --- /dev/null > +++ b/gcc/testsuite/gcc.misc-tests/gcov-26.c > @@ -0,0 +1,23 @@ > +/* { dg-options "-fprofile-arcs -ftest-coverage" } */ > +/* { dg-do run { target native } } */ > + > +/* Filters are considered in order with latest-wins, so if a function is > + excluded and later included it should show up. */ > + > +int > +fn1 (int x) > +{ > + /* fst */ > + return x; > +} > + > +int > +fn2 (int x) > +{ > + /* snd */ > + return x * 2; > +} > + > +int main () {} > + > +/* { dg-final { run-gcov { filters { fst snd } { main } } { --exclude=1 > --include=fn gcov-26.c } } } */ > diff --git a/gcc/testsuite/gcc.misc-tests/gcov-27.c > b/gcc/testsuite/gcc.misc-tests/gcov-27.c > new file mode 100644 > index 00000000000..53619472c01 > --- /dev/null > +++ b/gcc/testsuite/gcc.misc-tests/gcov-27.c > @@ -0,0 +1,22 @@ > +/* { dg-options "-fprofile-arcs -ftest-coverage" } */ > +/* { dg-do run { target native } } */ > + > +/* If only --exclude is used, other functions should be used by default. */ > + > +int > +fn1 (int x) > +{ > + /* fst */ > + return x; > +} > + > +int > +fn2 (int x) > +{ > + /* snd */ > + return x * 2; > +} > + > +int main () {} > + > +/* { dg-final { run-gcov { filters { fst snd } { main } } { --exclude=main > gcov-27.c } } } */ > diff --git a/gcc/testsuite/lib/gcov.exp b/gcc/testsuite/lib/gcov.exp > index fe6cd9635db..586c2cd1b40 100644 > --- a/gcc/testsuite/lib/gcov.exp > +++ b/gcc/testsuite/lib/gcov.exp > @@ -249,6 +249,44 @@ proc verify-calls { testname testcase file } { > return $failed > } > > +# Verify that report filtering includes and excludes the right functions. > +proc verify-filters { testname testcase file expected unexpected } { > + set fd [open $file r] > + > + set seen {} > + set ex [concat $expected $unexpected] > + > + while { [gets $fd line] >= 0 } { > + foreach sym $ex { > + if [regexp "$sym" "$line"] { > + lappend seen $sym > + } > + } > + } > + > + set seen [lsort -unique $seen] > + > + set expected [lmap key $expected { > + if { $key in $seen } continue > + set key > + }] > + set unexpected [lmap key $unexpected { > + if { $key ni $seen } continue > + set key > + }] > + > + foreach sym $expected { > + fail "Did not see expected symbol '$sym'" > + } > + > + foreach sym $unexpected { > + fail "Found unexpected symbol '$sym'" > + } > + > + close $fd > + return [expr [llength $expected] + [llength $unexpected]] > +} > + > proc gcov-pytest-format-line { args } { > global subdir > > @@ -323,6 +361,7 @@ proc run-gcov { args } { > set gcov_verify_branches 0 > set gcov_verify_lines 1 > set gcov_verify_intermediate 0 > + set gcov_verify_filters 0 > set gcov_remove_gcda 0 > set xfailed 0 > > @@ -331,11 +370,16 @@ proc run-gcov { args } { > set gcov_verify_calls 1 > } elseif { $a == "branches" } { > set gcov_verify_branches 1 > + } elseif { [lindex $a 0] == "filters" } { > + set gcov_verify_filters 1 > + set verify_filters_expected [lindex $a 1] > + set verify_filters_unexpected [lindex $a 2] > } elseif { $a == "intermediate" } { > set gcov_verify_intermediate 1 > set gcov_verify_calls 0 > set gcov_verify_branches 0 > set gcov_verify_lines 0 > + set gcov_verify_filters 0 > } elseif { $a == "remove-gcda" } { > set gcov_remove_gcda 1 > } elseif { $gcov_args == "" } { > @@ -415,15 +459,20 @@ proc run-gcov { args } { > } else { > set ifailed 0 > } > + if { $gcov_verify_filters } { > + set ffailed [verify-filters $testname $testcase $testcase.gcov > $verify_filters_expected $verify_filters_unexpected] > + } else { > + set ffailed 0 > + } > > # Report whether the gcov test passed or failed. If there were > # multiple failures then the message is a summary. > - set tfailed [expr $lfailed + $bfailed + $cfailed + $ifailed] > + set tfailed [expr $lfailed + $bfailed + $cfailed + $ifailed + $ffailed] > if { $xfailed } { > setup_xfail "*-*-*" > } > if { $tfailed > 0 } { > - fail "$testname gcov: $lfailed failures in line counts, $bfailed in > branch percentages, $cfailed in return percentages, $ifailed in intermediate > format" > + fail "$testname gcov: $lfailed failures in line counts, $bfailed in > branch percentages, $cfailed in return percentages, $ifailed in intermediate > format, $ffailed failed in filters" > if { $xfailed } { > clean-gcov $testcase > } > -- > 2.30.2 >