Hi Ravi, My apologies for being slightly confused. I think what you are proposing with your patch is the right thing. I am just slightly confused because I thought the current code already did what you are proposing. So the current code doesn't actually do what I thought it did :)
I have attached a simplified version of what I believe systemtap is doing. It is a two step process. First it calls dwfl_linux_kernel_report_offline to get the kernel file itself. Second it gets the Dwarf for the kernel module, which should trigger find_debuginfo to find the separate debug file, if needed. It still doesn't show the full search path (for that we should hack find_debuginfo_in_path and try_open in libdwfl/find-debuginfo.c a bit), but it should be a start to better understand why the current search isn't finding the separate kernel debug file. On Wed, 2016-02-17 at 13:50 +0530, Ravi Bangoria wrote: > On Tuesday 16 February 2016 10:15 PM, Mark Wielaard wrote: > >> This patch adds functionality to search for file without any extension > >> followed by searching file having .debug extension. > > Thanks for the analysis. I do have a question though. > > > > The reason we are looking for the vmlinux-3.13.0-76-generic.debug > > filename instead of the plain vmlinux-3.13.0-76-generic filename seems > > to be because /boot/vmlinux contains a .gnu_debuginfo link with the name > > of the debug file. But that name doesn't match? > > So I should put it this way: > > 'Ubuntu on powerpc' installs 'stripped' kernel 'without compressing it' > as /boot/vmlinux. Most of the time it's assumed that vmlinux will have > .debuginfo of .debuglink section. But this is not true here. See outputs > below. > > > What does eu-readelf --strings=.gnu_debuglink /boot/vmlinux* say? > > # eu-readelf --strings=.gnu_debuglink /boot/vmlinux* > > /boot/vmlinux: > > eu-readelf: > section '.gnu_debuglink' does not exist > > /boot/vmlinux-3.13.0-76-generic: > > eu-readelf: > section '.gnu_debuglink' does not exist > > /boot/vmlinux-4.2.0-27-generic: > > eu-readelf: > section '.gnu_debuglink' does not exist > > /boot/vmlinux.old: > > eu-readelf: > section '.gnu_debuglink' does not exist Aha, that is indeed not what I expected. Thanks. > > Would it make sense to not put in the "correct" (without .debug suffix) > > debugfile name in .gnu_debuglink > > Yes, that should solve the problem. But changing structure of ubuntu > kernel elf to solve stap problem doesn't look feasible to me. IIRC no > other distro do that. I've confirmed this on RHEL. RHEL distro-kernel > does not have .debuglink section. OK, thanks. That is actually not what I expected. I had expected that if there is no .debuglink section we already search for both the plain and .debug file name. Apparently I got this wrong. > Then you may ask how this is working in RHEL then. RHEL / Fedora on > powerpc installs stripped uncompressed kernel as /boot/vmlinuz. > elfutils looking for vmlinux, doesn't find it from /boot/, so it continue > looking for file without any suffix(.debug) and it gets proper file. This is > the case with ubunut x86 as well, except on x86, /boot/vmlinuz is > compressed stripped image. Aha. So Fedora/RHEL "cheats" by calling it vmlinuz while it isn't actually compressed. And another thing I missed is that the vmlinux file is the full unstripped kernel image, even though it is put under /usr/debug... So it contains the whole kernel plus the debuginfo. > So I thought about this solution as well, renaming /boot/vmlinux to > /boot/vmlinuz will solve the problem. But this is not appropriate. Because > vmlinu*z* is for compressed image and we are just renaming file without > compressing to solve stap problem. Yeah, it is a bit cheating. > > or completely leaving .gnu_debuginfo > > out (or is nothing found then?) > > Not sure what does this mean. Can you please explain a bit more. Sorry, silly confusing typo. I meant leaving out .gnu_debuglink. But it is already not there. Thanks, Mark
// gcc -Wall -o dwfl_find_kernel -ldw dwfl_find_kernel.c #include <inttypes.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/utsname.h> #include <elfutils/libdwfl.h> static char *kernel_file = NULL; static int setup_report_kernel (const char* modname, const char* filename) { printf ("setup report modname: %s, filename: %s\n", modname, filename); /* We are only interested in the kernel. */ if (modname != NULL && strcmp (modname, "kernel") == 0) { /* We will get a null filename for modname == "kernel" if the kernel couldn't be found. We still want to stop, not interested in any modules being found. */ if (filename == NULL) return -1; /* Copy the kernel file name and tell dwfl we want this module. */ kernel_file = strdup (filename); return 1; } /* Once we have seen the kernel we don't need any more modules. We are not interested in any kernel modules. */ if (kernel_file != NULL) return -1; /* We don't need this module. But we haven't seen the kernel yet, continue. */ return 0; } static char *kernel_dwarf_file = NULL; static int getdwarf_kernel (Dwfl_Module *mod, void **userdata, const char *name, Dwarf_Addr base, Dwarf *dw, Dwarf_Addr bias, void *arg) { printf ("getdwarf name: %s\n", name); if (name != NULL && strcmp (name, "kernel") == 0) { if (dw == NULL) { printf ("kernel without DWARF\n"); return DWARF_CB_ABORT; } const char *mainfile, *debugfile; dwfl_module_info (mod, NULL, NULL, NULL, NULL, NULL, &mainfile, &debugfile); printf ("mainfile: %s, debugfile: %s\n", mainfile, debugfile); if (debugfile != NULL) kernel_dwarf_file = strdup (debugfile); else kernel_dwarf_file = strdup (mainfile); } return DWARF_CB_OK; } int main (int argc, char **argv) { char *release; if (argc > 1) release = argv[1]; else { struct utsname uts; if (uname (&uts) != 0) { perror ("uname failed"); return -1; } release = uts.release; } char *debuginfo_path; if (argc > 2) debuginfo_path = argv[2]; else debuginfo_path = "+:.debug:/usr/lib/debug"; printf ("Finding kernel release: %s\n", release); printf ("Debuginfo path: %s\n", debuginfo_path); Dwfl_Callbacks kernel_callbacks = { dwfl_linux_kernel_find_elf, dwfl_standard_find_debuginfo, dwfl_offline_section_address, (char **) & debuginfo_path }; Dwfl *dwfl = dwfl_begin (&kernel_callbacks); if (dwfl == NULL) { printf ("dwfl_begin: %s\n", dwfl_errmsg (-1)); return -1; } dwfl_report_begin (dwfl); dwfl_linux_kernel_report_offline (dwfl, release, &setup_report_kernel); dwfl_report_end(dwfl, NULL, NULL); if (kernel_file == NULL) { printf ("Couldn't find kernel\n"); return -1; } printf ("Found kernel file: %s\n", kernel_file); ptrdiff_t p = 0; do p = dwfl_getdwarf (dwfl, &getdwarf_kernel, NULL, p); while (p > 0); if (kernel_dwarf_file == NULL) { printf ("Couldn't get kernel DWARF\n"); return -1; } printf ("Found kernel DWARF file: %s\n", kernel_dwarf_file); dwfl_end (dwfl); free (kernel_file); free (kernel_dwarf_file); return 0; }