Add support for partial segment remapping, where non-relinked binaries
can try to take advantage of libhugetlbfs' segment remapping code. By
LD_PRELOAD'ing the library and specifying the __executable_start address
in the HUGETLB_FORCE_REMAP variable, the library mimics the behavior it
would have if the binary were relinked with our linker scripts.
A few caveats: this is only useful for binaries with already *very*
large segments, especially on power. We can only really use this partial
remapping algorithm if the segment size is larger than the granularity
at which hugepages can be used. This is because we have to be very
careful not to reduce the available address space or to violate any
contiguity rules. Given power's restriction of one page-size per 256M
area, this requires very large segments. x86 and x86_64 are not so
seriously impacted, as a several-MB large array is sufficient to allow
the remapping to occur.
This feature is mutually exclusive to MINIMAL_COPY, because we are
PRELOAD'd and so do not know what may or may not be uninitialized
anymore.
Signed-off-by: Nishanth Aravamudan <[EMAIL PROTECTED]>
---
elflink.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 147 insertions(+), 19 deletions(-)
diff --git a/elflink.c b/elflink.c
index a53c649..adaf71c 100644
--- a/elflink.c
+++ b/elflink.c
@@ -197,6 +197,7 @@ static struct seg_info htlb_seg_table[MAX_HTLB_SEGS];
static int htlb_num_segs;
static int minimal_copy = 1;
static int sharing; /* =0 */
+static unsigned long force_remap;
int __debug = 0;
/**
@@ -518,11 +519,7 @@ bail2:
seg->extrasz = end_orig - start_orig;
}
-/*
- * Parse an ELF header and record segment information for any segments
- * which contain hugetlb information.
- */
-static void parse_elf(Elf_Ehdr *ehdr)
+static void parse_elf_relinked(Elf_Ehdr *ehdr)
{
Elf_Phdr *phdr = (Elf_Phdr *)((char *)ehdr + ehdr->e_phoff);
int i;
@@ -570,6 +567,128 @@ static void parse_elf(Elf_Ehdr *ehdr)
}
}
+static void parse_elf_normal(Elf_Ehdr *ehdr)
+{
+ Elf_Phdr *phdr = (Elf_Phdr *)((char *)ehdr + ehdr->e_phoff);
+ int i;
+ long hugepage_granularity = hugetlbfs_vaddr_granularity();
+
+ for (i = 0; i < ehdr->e_phnum && htlb_num_segs < MAX_HTLB_SEGS; i++) {
+ unsigned long vaddr, filesz, memsz, gap;
+ int prot = 0;
+
+ if (phdr[i].p_type != PT_LOAD)
+ continue;
+
+ /*
+ * Partial segment remapping only makes sense if the
+ * memory size of the segment is larger than the
+ * granularity at which hugepages can be used. This
+ * mostly affects ppc, where the segment must be larger
+ * than 256M. This guarantees that remapping the binary
+ * in this forced way won't violate any contiguity
+ * constraints.
+ */
+ if (phdr[i].p_memsz <= hugepage_granularity) {
+ DEBUG("Segment %d too small: %#0lx < %#0lx\n",
+ i, (unsigned long)phdr[i].p_memsz,
+ (unsigned long)hugepage_granularity);
+ continue;
+ }
+
+ vaddr = ALIGN(phdr[i].p_vaddr, hugepage_granularity);
+ gap = vaddr - phdr[i].p_vaddr;
+ memsz = phdr[i].p_memsz & (~(hugepage_granularity - 1));
+
+ if (memsz > gap) {
+ memsz -= gap;
+ } else {
+ /*
+ * if aligning the memsz to the granularity and
+ * starting it after the gap would mean it is
+ * less than 0, than we can't relink this
+ * segment
+ */
+ DEBUG("Aligning segment %d's memsz makes it too "
+ "small\n", i);
+ continue;
+ }
+ /*
+ * minimal_copy is disabled so just set filesz to memsz,
+ * to avoid issues in prepare
+ */
+ filesz = memsz;
+
+ if (phdr[i].p_flags & PF_R)
+ prot |= PROT_READ;
+ if (phdr[i].p_flags & PF_W)
+ prot |= PROT_WRITE;
+ if (phdr[i].p_flags & PF_X)
+ prot |= PROT_EXEC;
+
+ DEBUG("Hugepage segment %d (phdr %d): %#0lx-%#0lx "
+ "(filesz=%#0lx) " "(prot = %#0x)\n",
+ htlb_num_segs, i, vaddr, vaddr+memsz,
+ filesz, prot);
+
+ htlb_seg_table[htlb_num_segs].vaddr = (void *)vaddr;
+ htlb_seg_table[htlb_num_segs].filesz = filesz;
+ htlb_seg_table[htlb_num_segs].memsz = memsz;
+ htlb_seg_table[htlb_num_segs].prot = prot;
+ htlb_seg_table[htlb_num_segs].num = i;
+ htlb_num_segs++;
+ }
+}
+
+/*
+ * Parse an ELF header and record segment information for any segments
+ * which contain hugetlb information.
+ */
+static int parse_elf()
+{
+ extern Elf_Ehdr __executable_start __attribute__((weak));
+
+ /* a normal, not relinked binary */
+ if (! (&__executable_start)) {
+ if (force_remap) {
+ /*
+ * FIXME: verify the passed in address is a
+ * valid ELF exec header by looking for the ELF
+ * string
+ */
+ parse_elf_normal((Elf_Ehdr *)force_remap);
+ if (htlb_num_segs == 0) {
+ DEBUG("No segments were appropriate for "
+ "partial remapping\n");
+ return -1;
+ }
+ } else {
+ DEBUG("Couldn't locate __executable_start, "
+ "not attempting to remap segments\n");
+ return -1;
+ }
+ } else {
+ parse_elf_relinked(&__executable_start);
+ if (htlb_num_segs == 0) {
+ if (force_remap) {
+ DEBUG("Executable is not linked for hugepage
segments, "
+ "but partial segment remapping
enabled\n");
+ parse_elf_normal((Elf_Ehdr *)force_remap);
+ if (htlb_num_segs == 0) {
+ DEBUG("No segments were appropriate for
"
+ "partial remapping\n");
+ return -1;
+ }
+ } else {
+ DEBUG("Executable is not linked for hugepage
segments\n");
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
/*
* Copy a program segment into a huge page. If possible, try to copy the
* smallest amount of data possible, unless the user disables this
@@ -894,13 +1013,29 @@ static int check_env(void)
env = getenv("LD_PRELOAD");
if (env && strstr(env, "libhugetlbfs")) {
- ERROR("LD_PRELOAD is incompatible with segment remapping\n");
- ERROR("Segment remapping has been DISABLED\n");
- return -1;
+ char *env2, *ep;
+ env2 = getenv("HUGETLB_FORCE_ELFMAP");
+ if (env2) {
+ force_remap = strtoul(env2, &ep, 16);
+ if (*ep != '\0') {
+ ERROR("Can't parse HUGETLB_FORCE_ELFMAP: "
+ "%s\n", strerror(errno));
+ return -1;
+ }
+ DEBUG("HUGETLB_FORCE_ELFMAP=%#0lx, enabling partial
segment "
+ "remapping for non-relinked binaries\n",
+ force_remap);
+ DEBUG("Disabling filesz copy optimization\n");
+ minimal_copy = 0;
+ } else {
+ ERROR("LD_PRELOAD is incompatible with segment
remapping\n");
+ ERROR("Segment remapping has been DISABLED\n");
+ return -1;
+ }
}
env = getenv("HUGETLB_MINIMAL_COPY");
- if (env && (strcasecmp(env, "no") == 0)) {
+ if (minimal_copy && env && (strcasecmp(env, "no") == 0)) {
DEBUG("HUGETLB_MINIMAL_COPY=%s, disabling filesz copy "
"optimization\n", env);
minimal_copy = 0;
@@ -935,20 +1070,13 @@ static int check_env(void)
static void __attribute__ ((constructor)) setup_elflink(void)
{
- extern Elf_Ehdr __executable_start __attribute__((weak));
- Elf_Ehdr *ehdr = &__executable_start;
- int ret, i;
-
- if (! ehdr) {
- DEBUG("Couldn't locate __executable_start, "
- "not attempting to remap segments\n");
- return;
- }
+ int i, ret;
if (check_env())
return;
- parse_elf(ehdr);
+ if (parse_elf())
+ return;
if (htlb_num_segs == 0) {
DEBUG("Executable is not linked for hugepage segments\n");
--
Nishanth Aravamudan <[EMAIL PROTECTED]>
IBM Linux Technology Center
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Libhugetlbfs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel