On Sat, Mar 21, 2020 at 01:30:55AM +0100, Mark Wielaard wrote:
> Hi Omar,
>
> On Wed, Mar 18, 2020 at 01:18:51PM -0700, Omar Sandoval wrote:
> > __elf_getphdrnum_rdlock() handles PN_XNUM by getting sh_info from
> > elf->state.elf{32,64}.scns.data[0].shdr.e{32,64}. However, that is only
> > a cache that may or may not have been populated by elf_begin() or
> > elf{32,64}_getshdr(); if it hasn't been cached yet, elf_getphdrnum()
> > returns 65535 (the value of PN_XNUM) instead. We should explicitly get
> > the shdr if it isn't cached.
>
> I believe this analysis is correct. But how did you find this? This
> seems to only happen if e_phnum was PN_XNUM and for some reason the
> scns cache wasn't initialized. Do you happen to have a testcase?
I encountered this in drgn on a vmcore for a large server created by
makedumpfile, but I was able to put together a minimal reproducer.
Generate the ELF file with this python script:
---
#!/usr/bin/env python3
import struct
import sys
phnum = 66000
sys.stdout.buffer.write(
struct.pack(
"<16BHHIQQQIHHHHHH",
# EI_MAG
*b"\x7fELF",
# EI_CLASS = ELFCLASS64
2,
# EI_DATA = ELFDATA2LSB
1,
# EI_VERSION
1,
# EI_OSABI = ELFOSABI_SYSV
0,
# EI_ABIVERSION
0,
# EI_PAD
*bytes(7),
# e_type = ET_CORE
4,
# e_machine = EM_X86_64
62,
# e_version
1,
# e_entry
0,
# e_phoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr)
128,
# e_shoff = sizeof(Elf64_Ehdr)
64,
# e_flags
0,
# e_ehsize
64,
# e_phentsize
56,
# e_phnum = PN_XNUM
0xFFFF,
# e_shentsize
64,
# e_shnum
1,
# e_shstrndx
0,
)
)
sys.stdout.buffer.write(
struct.pack(
"<IIQQQQIIQQ",
# sh_name
0,
# sh_type = SHT_NULL
0,
# sh_flags
0,
# sh_addr
0,
# sh_offset
0,
# sh_size
0,
# sh_link
0,
# sh_info
phnum,
# sh_addralign
0,
# sh_entsize
0,
)
)
for i in range(phnum):
sys.stdout.buffer.write(
struct.pack(
"<IIQQQQQQ",
# p_type = PT_LOAD
1,
# p_flags = PF_X|PF_W|PF_R
0x7,
# p_offset
0,
# p_vaddr
i * 4096,
# p_paddr
0,
# p_filesz
0,
# p_memsz
4096,
# p_align
0,
)
)
---
And run this program:
---
#include <elfutils/libdwelf.h>
#include <fcntl.h>
#include <libelf.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
int main(int argc, const char **argv)
{
int fd;
Elf *elf;
size_t phnum;
if (argc != 2) {
fprintf(stderr, "usage: %s FILE\n", argv[0]);
return EXIT_FAILURE;
}
fd = open(argv[1], O_RDONLY);
if (fd == -1) {
perror("open");
return EXIT_FAILURE;
}
elf_version(EV_CURRENT);
elf = elf_begin(fd, ELF_C_READ, NULL);
if (!elf) {
fprintf(stderr, "elf_begin: %s\n", elf_errmsg(-1));
return EXIT_FAILURE;
}
if (elf_getphdrnum(elf, &phnum)) {
fprintf(stderr, "elf_getphdrnum: %s\n", elf_errmsg(-1));
return EXIT_FAILURE;
}
printf("%zu\n", phnum);
return EXIT_SUCCESS;
}
---
This should output 66000, but it outputs 65535 instead.
Looking at file_read_elf, the cache is only initialized from elf_begin
from ELF_C_RDWR_MMAP and ELF_C_READ_MMAP_PRIVATE as long as endianness
matches the host and the section headers are properly aligned:
if (map_address != NULL && e_ident[EI_DATA] == MY_ELFDATA
&& cmd != ELF_C_READ_MMAP /* We need a copy to be able to write. */
&& (ALLOW_UNALIGNED
|| (((uintptr_t) ((char *) ehdr + e_shoff)
& (__alignof__ (Elf64_Shdr) - 1)) == 0)))