Hi Omar, On Mon, 2024-02-26 at 11:32 -0800, Omar Sandoval wrote: > Meta uses DWARF package files for our large, statically-linked C++ > applications. Some of our largest applications have more than 4GB in > .debug_info.dwo, but the section offsets in .debug_cu_index and > .debug_tu_index are 32 bits; see the discussion here [1]. We > implemented a workaround/extension for this in LLVM. Implement the > equivalent in libdw. > > To test this, we need files with more than 4GB in .debug_info.dwo. I > created these artificially by editing GCC's assembly output. They > compress down to 6KB. I test them from run-large-elf-file.sh to take > advantage of the existing checks for large file support. > > 1: https://discourse.llvm.org/t/dwarf-dwp-4gb-limit/63902. > > * libdw/dwarf_end.c (dwarf_package_index_free): New function. > * tests/testfile-dwp-4-cu-index-overflow.bz2: New test file. > * tests/testfile-dwp-4-cu-index-overflow.dwp.bz2: New test file. > * tests/testfile-dwp-5-cu-index-overflow.bz2: New test file. > * tests/testfile-dwp-5-cu-index-overflow.dwp.bz2: New test file. > * tests/testfile-dwp-cu-index-overflow.source: New file. > * tests/run-large-elf-file.sh: Check > testfile-dwp-5-cu-index-overflow and > testfile-dwp-4-cu-index-overflow.
The hack is kind of horrible, but given that this doesn't really impacts "normal" dwp files and it does work with clang/lldb, lets just support it too. > Signed-off-by: Omar Sandoval <osan...@fb.com> > --- > libdw/dwarf_cu_dwp_section_info.c | 147 ++++++++++++++- > libdw/dwarf_end.c | 15 +- > libdw/libdwP.h | 3 + > tests/Makefile.am | 7 +- > tests/run-large-elf-file.sh | 174 ++++++++++++++++++ > tests/testfile-dwp-4-cu-index-overflow.bz2 | Bin 0 -> 4490 bytes > .../testfile-dwp-4-cu-index-overflow.dwp.bz2 | Bin 0 -> 5584 bytes > tests/testfile-dwp-5-cu-index-overflow.bz2 | Bin 0 -> 4544 bytes > .../testfile-dwp-5-cu-index-overflow.dwp.bz2 | Bin 0 -> 5790 bytes > tests/testfile-dwp-cu-index-overflow.source | 86 +++++++++ > 10 files changed, 426 insertions(+), 6 deletions(-) > create mode 100755 tests/testfile-dwp-4-cu-index-overflow.bz2 > create mode 100644 tests/testfile-dwp-4-cu-index-overflow.dwp.bz2 > create mode 100755 tests/testfile-dwp-5-cu-index-overflow.bz2 > create mode 100644 tests/testfile-dwp-5-cu-index-overflow.dwp.bz2 > create mode 100644 tests/testfile-dwp-cu-index-overflow.source > > diff --git a/libdw/dwarf_cu_dwp_section_info.c > b/libdw/dwarf_cu_dwp_section_info.c > index 298f36f9..3d11c87a 100644 > --- a/libdw/dwarf_cu_dwp_section_info.c > +++ b/libdw/dwarf_cu_dwp_section_info.c > @@ -30,6 +30,8 @@ > # include <config.h> > #endif > > +#include <assert.h> > + > #include "libdwP.h" > > static Dwarf_Package_Index * > @@ -110,7 +112,9 @@ __libdw_read_package_index (Dwarf *dbg, bool tu) > > index->dbg = dbg; > /* Set absent sections to UINT32_MAX. */ > - memset (index->sections, 0xff, sizeof (index->sections)); > + for (size_t i = 0; > + i < sizeof (index->sections) / sizeof (index->sections[0]); i++) > + index->sections[i] = UINT32_MAX; > for (size_t i = 0; i < section_count; i++) > { > uint32_t section = read_4ubyte_unaligned (dbg, sections + i * 4); > @@ -161,6 +165,7 @@ __libdw_read_package_index (Dwarf *dbg, bool tu) > index->indices = indices; > index->section_offsets = section_offsets; > index->section_sizes = section_sizes; > + index->debug_info_offsets = NULL; > > return index; > } > @@ -177,6 +182,137 @@ __libdw_package_index (Dwarf *dbg, bool tu) > if (index == NULL) > return NULL; > > + /* Offsets in the section offset table are 32-bit unsigned integers. In > + practice, the .debug_info.dwo section for very large executables can be > + larger than 4GB. GNU dwp as of binutils 2.41 and llvm-dwp before LLVM > 15 > + both accidentally truncate offsets larger than 4GB. > + > + LLVM 15 detects the overflow and errors out instead; see LLVM commit > + f8df8114715b ("[DWP][DWARF] Detect and error on debug info offset > + overflow"). However, lldb in LLVM 16 supports using dwp files with > + truncated offsets by recovering them directly from the unit headers in > the > + .debug_info.dwo section; see LLVM commit c0db06227721 ("[DWARFLibrary] > Add > + support to re-construct cu-index"). Since LLVM 17, the overflow error > can > + be turned into a warning instead; see LLVM commit 53a483cee801 ("[DWP] > add > + overflow check for llvm-dwp tools if offset overflow"). > + > + LLVM's support for > 4GB offsets is effectively an extension to the > DWARF > + package file format, which we implement here. The strategy is to walk > the > + unit headers in .debug_info.dwo in lockstep with the DW_SECT_INFO > columns > + in the section offset tables. As long as they are in the same order > + (which they are in practice for both GNU dwp and llvm-dwp), we can > + correlate the truncated offset and produce a corrected array of offsets. > + > + Note that this will be fixed properly in DWARF 6: > + https://dwarfstd.org/issues/220708.2.html. */ > + if (index->sections[DW_SECT_INFO - 1] != UINT32_MAX > + && dbg->sectiondata[IDX_debug_info]->d_size > UINT32_MAX) > + { > + Dwarf_Package_Index *cu_index, *tu_index = NULL; > + if (tu) > + { > + tu_index = index; > + assert (dbg->cu_index == NULL); > + cu_index = __libdw_read_package_index (dbg, false); > + if (cu_index == NULL) > + { > + free(index); > + return NULL; > + } > + } > + else > + { > + cu_index = index; > + if (dbg->sectiondata[IDX_debug_tu_index] != NULL > + && dbg->sectiondata[IDX_debug_types] == NULL) > + { > + assert (dbg->tu_index == NULL); > + tu_index = __libdw_read_package_index (dbg, true); > + if (tu_index == NULL) > + { > + free(index); > + return NULL; > + } > + } > + } > + > + cu_index->debug_info_offsets = malloc (cu_index->unit_count > + * sizeof (Dwarf_Off)); > + if (cu_index->debug_info_offsets == NULL) > + { > + free (tu_index); > + free (cu_index); > + __libdw_seterrno (DWARF_E_NOMEM); > + return NULL; > + } > + if (tu_index != NULL) > + { > + tu_index->debug_info_offsets = malloc (tu_index->unit_count > + * sizeof (Dwarf_Off)); > + if (tu_index->debug_info_offsets == NULL) > + { > + free (tu_index); > + free (cu_index->debug_info_offsets); > + free (cu_index); > + __libdw_seterrno (DWARF_E_NOMEM); > + return NULL; > + } > + } > + > + Dwarf_Off off = 0; > + uint32_t cui = 0, tui = 0; > + uint32_t cu_count = cu_index->unit_count; > + const unsigned char *cu_offset > + = cu_index->section_offsets + cu_index->sections[DW_SECT_INFO - 1] * 4; > + uint32_t tu_count = 0; > + const unsigned char *tu_offset; > + if (tu_index != NULL) > + { > + tu_count = tu_index->unit_count; > + tu_offset = tu_index->section_offsets > + + tu_index->sections[DW_SECT_INFO - 1] * 4; > + } > + while (cui < cu_count || tui < tu_count) > + { > + Dwarf_Off next_off; > + uint8_t unit_type; > + if (__libdw_next_unit (dbg, false, off, &next_off, NULL, NULL, > + &unit_type, NULL, NULL, NULL, NULL, NULL) > + != 0) > + { > + not_sorted: > + free (cu_index->debug_info_offsets); > + cu_index->debug_info_offsets = NULL; > + if (tu_index != NULL) > + { > + free (tu_index->debug_info_offsets); > + tu_index->debug_info_offsets = NULL; > + } > + break; > + } > + if (unit_type != DW_UT_split_type && cui < cu_count) > + { > + if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, cu_offset)) > + goto not_sorted; > + cu_index->debug_info_offsets[cui++] = off; > + cu_offset += cu_index->section_count * 4; > + } > + else if (unit_type == DW_UT_split_type && tui < tu_count) > + { > + if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, tu_offset)) > + goto not_sorted; > + tu_index->debug_info_offsets[tui++] = off; > + tu_offset += tu_index->section_count * 4; > + } > + off = next_off; > + } > + > + if (tu) > + dbg->cu_index = cu_index; > + else if (tu_index != NULL) > + dbg->tu_index = tu_index; > + } > + > if (tu) > dbg->tu_index = index; > else This looks correct, but gcc noticed a path to use tu_offset (and tu_index) if they weren't initialized or NULL: In file included from /home/mark/src/elfutils/libdw/libdwP.h:684, from /home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c:35: In function ‘read_4ubyte_unaligned_1’, inlined from ‘__libdw_package_index’ at /home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c:302:34: /home/mark/src/elfutils/libdw/memory-access.h:291:12: error: ‘tu_offset’ may be used uninitialized [-Werror=maybe-uninitialized] 291 | return up->u4; | ~~^~~~ /home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c: In function ‘__libdw_package_index’: /home/mark/src/elfutils/libdw/dwarf_cu_dwp_section_info.c:268:28: note: ‘tu_offset’ was declared here 268 | const unsigned char *tu_offset; | ^~~~~~~~~ cc1: all warnings being treated as errors I couldn't immediately disprove gcc here, so I think it is a good idea to add an explicit check for tu_index != NULL. diff --git a/libdw/dwarf_cu_dwp_section_info.c b/libdw/dwarf_cu_dwp_section_info.c index 3d11c87a..9fdc15bf 100644 --- a/libdw/dwarf_cu_dwp_section_info.c +++ b/libdw/dwarf_cu_dwp_section_info.c @@ -297,7 +297,8 @@ __libdw_package_index (Dwarf *dbg, bool tu) cu_index->debug_info_offsets[cui++] = off; cu_offset += cu_index->section_count * 4; } - else if (unit_type == DW_UT_split_type && tui < tu_count) + else if (unit_type == DW_UT_split_type && tu_index != NULL + && tui < tu_count) { if ((off & UINT32_MAX) != read_4ubyte_unaligned (dbg, tu_offset)) goto not_sorted; Which makes gcc happy again. > @@ -244,8 +380,13 @@ __libdw_dwp_section_info (Dwarf_Package_Index *index, > uint32_t unit_row, > size_t i = (size_t)(unit_row - 1) * index->section_count > + index->sections[section - 1]; > if (offsetp != NULL) > - *offsetp = read_4ubyte_unaligned (index->dbg, > - index->section_offsets + i * 4); > + { > + if (section == DW_SECT_INFO && index->debug_info_offsets != NULL) > + *offsetp = index->debug_info_offsets[unit_row - 1]; > + else > + *offsetp = read_4ubyte_unaligned (index->dbg, > + index->section_offsets + i * 4); > + } > if (sizep != NULL) > *sizep = read_4ubyte_unaligned (index->dbg, > index->section_sizes + i * 4); OK. > diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c > index 78224ddb..ed8d27be 100644 > --- a/libdw/dwarf_end.c > +++ b/libdw/dwarf_end.c > @@ -40,6 +40,17 @@ > #include "cfi.h" > > > +static void > +dwarf_package_index_free (Dwarf_Package_Index *index) > +{ > + if (index != NULL) > + { > + free (index->debug_info_offsets); > + free (index); > + } > +} > + > + > static void > noop_free (void *arg __attribute__ ((unused))) > { > @@ -79,8 +90,8 @@ dwarf_end (Dwarf *dwarf) > { > if (dwarf != NULL) > { > - free (dwarf->tu_index); > - free (dwarf->cu_index); > + dwarf_package_index_free (dwarf->tu_index); > + dwarf_package_index_free (dwarf->cu_index); > > if (dwarf->cfi != NULL) > /* Clean up the CFI cache. */ OK. > diff --git a/libdw/libdwP.h b/libdw/libdwP.h > index 1a0a4df3..6018399c 100644 > --- a/libdw/libdwP.h > +++ b/libdw/libdwP.h > @@ -371,6 +371,9 @@ typedef struct Dwarf_Package_Index_s > const unsigned char *indices; > const unsigned char *section_offsets; > const unsigned char *section_sizes; > + /* If DW_SECT_INFO section offsets were truncated to 32 bits, recovered > + 64-bit offsets. */ > + Dwarf_Off *debug_info_offsets; > } Dwarf_Package_Index; > > /* CU representation. */ OK. > diff --git a/tests/Makefile.am b/tests/Makefile.am > index 3f80c451..98131a6b 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -641,7 +641,12 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh > \ > testfile-dwp-4.bz2 testfile-dwp-4.dwp.bz2 \ > testfile-dwp-4-strict.bz2 testfile-dwp-4-strict.dwp.bz2 \ > testfile-dwp-5.bz2 testfile-dwp-5.dwp.bz2 testfile-dwp.source \ > - run-cu-dwp-section-info.sh run-declfiles.sh > + run-cu-dwp-section-info.sh run-declfiles.sh \ > + testfile-dwp-5-cu-index-overflow \ > + testfile-dwp-5-cu-index-overflow.dwp \ > + testfile-dwp-4-cu-index-overflow \ > + testfile-dwp-4-cu-index-overflow.dwp \ > + testfile-dwp-cu-index-overflow.source > > > if USE_VALGRIND You mean the .bz2 files here. Fixed. diff --git a/tests/Makefile.am b/tests/Makefile.am index 98131a6b..9141074f 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -642,10 +642,10 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ testfile-dwp-4-strict.bz2 testfile-dwp-4-strict.dwp.bz2 \ testfile-dwp-5.bz2 testfile-dwp-5.dwp.bz2 testfile-dwp.source \ run-cu-dwp-section-info.sh run-declfiles.sh \ - testfile-dwp-5-cu-index-overflow \ - testfile-dwp-5-cu-index-overflow.dwp \ - testfile-dwp-4-cu-index-overflow \ - testfile-dwp-4-cu-index-overflow.dwp \ + testfile-dwp-5-cu-index-overflow.bz2 \ + testfile-dwp-5-cu-index-overflow.dwp.bz2 \ + testfile-dwp-4-cu-index-overflow.bz2 \ + testfile-dwp-4-cu-index-overflow.dwp.bz2 \ testfile-dwp-cu-index-overflow.source Thanks for the new tests. Pushed with the fixlets above. Cheers, Mark