>From 04bcab70c7dcb9c1bc7ca49508b5d7cbd5aeaa1a Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler git" <f...@elastic.org> Date: Fri, 18 Sep 2020 13:03:01 -0400 Subject: [PATCH] debuginfod: store only canonicalized sref pathnames in database
From: Frank Ch. Eigler <f...@redhat.com> Since PR25548, we let debuginfod answer /buildid/HEX/source/PATH queries with both canonicalized and raw PATHs. It canonicalizes incoming paths, but still stored the raw paths in the database too. This near-dupe storage is not needed, since the queries would always find the canonicalized version too, so stop doing that. This saves database space/time. Signed-off-by: Frank Ch. Eigler <f...@redhat.com> --- debuginfod/ChangeLog | 5 +++ debuginfod/debuginfod.cxx | 69 +++++++++++++-------------------------- 2 files changed, 28 insertions(+), 46 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index a8e0ac5ecf0f..8cb89967e9d1 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,8 @@ +2020-09-18 Frank Ch. Eigler <f...@redhat.com> + + * debuginfod.cxx (scan_source_file, archive_classify): Store only + canonicalized file names in sdef & sref records in the database. + 2020-09-08 Mark Wielaard <m...@klomp.org> * Makefile.am (BUILD_STATIC): Include libcurl_LIBS in libdebuginfod diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 5621030292e8..140b7789de3b 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -1505,6 +1505,8 @@ handle_buildid (MHD_Connection* conn, "order by sharedprefix(source0,source0ref) desc, mtime desc"); pp->reset(); pp->bind(1, buildid); + // NB: we don't store the non-canonicalized path names any more, but old databases + // might have them (and no canon ones), so we keep searching for both. pp->bind(2, suffix); pp->bind(3, canon_pathname(suffix)); } @@ -2254,41 +2256,27 @@ scan_source_file (const string& rps, const stat_t& st, .bind(1, srps) .step_ok_done(); - // register the dwarfsrc name in the interning table too + // PR25548: store canonicalized dwarfsrc path + string dwarfsrc_canon = canon_pathname (dwarfsrc); + if (dwarfsrc_canon != dwarfsrc) + { + if (verbose > 3) + obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; + } + ps_upsert_files .reset() - .bind(1, dwarfsrc) + .bind(1, dwarfsrc_canon) .step_ok_done(); ps_upsert_s .reset() .bind(1, buildid) - .bind(2, dwarfsrc) + .bind(2, dwarfsrc_canon) .bind(3, srps) .bind(4, sfs.st_mtime) .step_ok_done(); - // PR25548: also store canonicalized source path - string dwarfsrc_canon = canon_pathname (dwarfsrc); - if (dwarfsrc_canon != dwarfsrc) - { - if (verbose > 3) - obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; - - ps_upsert_files - .reset() - .bind(1, dwarfsrc_canon) - .step_ok_done(); - - ps_upsert_s - .reset() - .bind(1, buildid) - .bind(2, dwarfsrc_canon) - .bind(3, srps) - .bind(4, sfs.st_mtime) - .step_ok_done(); - } - inc_metric("found_sourcerefs_total","source","files"); } } @@ -2439,37 +2427,26 @@ archive_classify (const string& rps, string& archive_extension, continue; } + // PR25548: store canonicalized source path + const string& dwarfsrc = s; + string dwarfsrc_canon = canon_pathname (dwarfsrc); + if (dwarfsrc_canon != dwarfsrc) + { + if (verbose > 3) + obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; + } + ps_upsert_files .reset() - .bind(1, s) + .bind(1, dwarfsrc_canon) .step_ok_done(); ps_upsert_sref .reset() .bind(1, buildid) - .bind(2, s) + .bind(2, dwarfsrc_canon) .step_ok_done(); - // PR25548: also store canonicalized source path - const string& dwarfsrc = s; - string dwarfsrc_canon = canon_pathname (dwarfsrc); - if (dwarfsrc_canon != dwarfsrc) - { - if (verbose > 3) - obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; - - ps_upsert_files - .reset() - .bind(1, dwarfsrc_canon) - .step_ok_done(); - - ps_upsert_sref - .reset() - .bind(1, buildid) - .bind(2, dwarfsrc_canon) - .step_ok_done(); - } - fts_sref ++; } } -- 2.26.2