Author: Nishanth Aravamudan <[EMAIL PROTECTED]>
Date: Fri Feb 9 19:29:41 2007 -0800
elflink: partial segment remapping
Add support for partial segment remapping, where non-relinked binaries
can try to take advantage of libhugetlbfs' segment remapping code. By
LD_PRELOAD'ing the library and specifying the __executable_start address
in the HUGETLB_FORCE_REMAP variable, the library mimics the behavior it
would have if the binary were relinked with our linker scripts.
A few caveats: this is only useful for binaries with already *very*
large segments, especially on power. We can only really use this partial
remapping algorithm if the segment size is larger than the granularity
at which hugepages can be used. This is because we have to be very
careful not to reduce the available address space or to violate any
contiguity rules. Given power's restriction of one page-size per 256M
area, this requires very large segments. x86 and x86_64 are not so
seriously impacted, as a several-MB large array is sufficient to allow
the remapping to occur.
This feature is mutually exclusive to MINIMAL_COPY, because we are
PRELOAD'd and so do not know what may or may not be uninitialized
anymore.
Signed-off-by: Nishanth Aravamudan <[EMAIL PROTECTED]>
---
elflink.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 145 insertions(+), 18 deletions(-)
There are probably many bugs in this code, but I wanted to get it out
there. I would appreciate comments, nits, and requests for rewrites from
everyone :)
A simple app shows the code in action and its benefit:
int array[8*1024*1024];
int main() {
getchar();
return 0;
}
which when run as
LD_PRELOAD=obj64/libhugetlbfs.so HUGETLB_VERBOSE=99
HUGETLB_FORCE_REMAP=0x400000 ./test
results in
libhugetlbfs: HUGETLB_FORCE_REMAP=4194304, enabling partial segment
remapping for non-relinked binaries
libhugetlbfs: HUGETLB_FORCE_REMAP is set, disabling filesz copy optimization
libhugetlbfs: HUGETLB_SHARE=0, sharing disabled
libhugetlbfs: Hugepage segment 0 (phdr 3): 0x800000-0x2600658 (filesz=0)
(prot = 0x3)
libhugetlbfs: Mapped hugeseg at 0x2aaaaac00000. Copying 0 bytes from
0x800000...done
libhugetlbfs: Copying extra 0x1e00658 bytes from 0x800000...done
libhugetlbfs: Prepare succeeded
diff --git a/elflink.c b/elflink.c
index 5a57358..1b5d684 100644
--- a/elflink.c
+++ b/elflink.c
@@ -199,6 +199,7 @@ static int minimal_copy = 1;
static int sharing; /* =0 */
int __debug = 0;
static Elf_Ehdr *ehdr;
+static unsigned long force_remap;
/**
* assemble_path - handy wrapper around snprintf() for building paths
@@ -233,11 +234,84 @@ static void assemble_path(char *dst, const char *fmt, ...)
}
/*
+ * Invoked to parse a non-relinked binary's headers
+ */
+static void parse_normal_phdrs()
+{
+ Elf_Phdr *phdr = (Elf_Phdr *)((char *)ehdr + ehdr->e_phoff);
+ int i;
+ long hugepage_granularity = hugetlbfs_vaddr_granularity();
+
+ for (i = 0; i < ehdr->e_phnum && htlb_num_segs < MAX_HTLB_SEGS; i++) {
+ unsigned long vaddr, filesz, memsz, gap;
+ int prot = 0;
+
+ if (phdr[i].p_type != PT_LOAD)
+ continue;
+
+ /*
+ * Partial segment remapping only makes sense if the
+ * memory size of the segment is larger than the
+ * granularity at which hugepages can be used. This
+ * mostly affects ppc, where the segment must be larger
+ * than 256M. This guarantees that remapping the binary
+ * in this forced way won't violate any contiguity
+ * constraints.
+ */
+ if (phdr[i].p_memsz <= hugepage_granularity)
+ continue;
+
+ vaddr = ALIGN(phdr[i].p_vaddr, hugepage_granularity);
+ gap = vaddr - phdr[i].p_vaddr;
+ memsz = phdr[i].p_memsz & (~(hugepage_granularity - 1));
+ filesz = phdr[i].p_filesz & (~(hugepage_granularity - 1));
+ if (memsz > gap) {
+ memsz -= gap;
+ } else {
+ /*
+ * if aligning the memsz to the granularity and
+ * starting it after the gap would mean it is
+ * less than 0, than we can't relink this
+ * segment
+ */
+ continue;
+ }
+ if (filesz > gap) {
+ filesz -= gap;
+ } else {
+ /*
+ * but it's ok for the filesz to be 0, since
+ * minimal_copy is disabled anyways
+ */
+ filesz = 0;
+ }
+ if (phdr[i].p_flags & PF_R)
+ prot |= PROT_READ;
+ if (phdr[i].p_flags & PF_W)
+ prot |= PROT_WRITE;
+ if (phdr[i].p_flags & PF_X)
+ prot |= PROT_EXEC;
+
+ DEBUG("Hugepage segment %d (phdr %d): %#0lx-%#0lx "
+ "(filesz=%#0lx) " "(prot = %#0x)\n",
+ htlb_num_segs, i, vaddr, vaddr+memsz,
+ filesz, prot);
+
+ htlb_seg_table[htlb_num_segs].vaddr = (void *)vaddr;
+ htlb_seg_table[htlb_num_segs].filesz = filesz;
+ htlb_seg_table[htlb_num_segs].memsz = memsz;
+ htlb_seg_table[htlb_num_segs].prot = prot;
+ htlb_seg_table[htlb_num_segs].phdr = i;
+ htlb_num_segs++;
+ }
+}
+
+/*
* Parse an ELF header and record segment information for any segments
* which contain hugetlb information.
*/
-static void parse_phdrs(Elf_Ehdr *ehdr)
+static void parse_relinked_phdrs()
{
Elf_Phdr *phdr = (Elf_Phdr *)((char *)ehdr + ehdr->e_phoff);
int i;
@@ -283,6 +357,54 @@ static void parse_phdrs(Elf_Ehdr *ehdr)
}
}
+static int parse_phdrs()
+{
+ extern Elf_Ehdr __executable_start __attribute__((weak));
+
+ /* a non-relinked binary */
+ if (! (&__executable_start)) {
+ if (force_remap) {
+ ehdr = (Elf_Ehdr *)force_remap;
+ /*
+ * FIXME: verify the passed in
+ * __executable_start is valid by looking for
+ * the ELF string
+ */
+ parse_normal_phdrs();
+ if (htlb_num_segs == 0) {
+ DEBUG("No segments were appropriate for "
+ "partial remapping\n");
+ return -1;
+ }
+ } else {
+ DEBUG("Couldn't locate __executable_start, "
+ "not attempting to remap segments\n");
+ return -1;
+ }
+ } else {
+ ehdr = &__executable_start;
+ parse_relinked_phdrs();
+ if (htlb_num_segs == 0) {
+ DEBUG("Executable is not linked for hugepage segments");
+ if (force_remap) {
+ DEBUG_CONT(", but partial segment remapping has
been enabled\n");
+ ehdr = (Elf_Ehdr *)force_remap;
+ parse_normal_phdrs();
+ if (htlb_num_segs == 0) {
+ DEBUG("No segments were appropriate for
"
+ "partial remapping\n");
+ return -1;
+ }
+ } else {
+ DEBUG_CONT("\n");
+ return -1;
+ }
+ }
+ }
+ return 0;
+
+}
+
/**
* find_or_create_share_path - obtain a directory to store the shared
* hugetlbfs files
@@ -833,7 +955,7 @@ static void remap_segments(struct seg_info *seg, int num)
static int check_env(void)
{
- char *env;
+ char *env, *ep;
env = getenv("HUGETLB_ELFMAP");
if (env && (strcasecmp(env, "no") == 0)) {
@@ -844,9 +966,22 @@ static int check_env(void)
env = getenv("LD_PRELOAD");
if (env && strstr(env, "libhugetlbfs")) {
- ERROR("LD_PRELOAD is incompatible with segment remapping\n");
- ERROR("Segment remapping has been DISABLED\n");
- return -1;
+ char *env2;
+ env2 = getenv("HUGETLB_FORCE_REMAP");
+ if (env2) {
+ force_remap = strtoul(env2, &ep, 16);
+ if (*ep != '\0') {
+ ERROR("Can't parse HUGETLB_FORCE_REMAP: %s\n",
env);
+ return -1;
+ }
+ DEBUG("HUGETLB_FORCE_REMAP=%lu, enabling partial
segment "
+ "remapping for non-relinked binaries\n",
+ force_remap);
+ } else {
+ ERROR("LD_PRELOAD is incompatible with segment
remapping\n");
+ ERROR("Segment remapping has been DISABLED\n");
+ return -1;
+ }
}
env = getenv("HUGETLB_MINIMAL_COPY");
@@ -854,6 +989,10 @@ static int check_env(void)
DEBUG("HUGETLB_MINIMAL_COPY=%s, disabling filesz copy "
"optimization\n", env);
minimal_copy = 0;
+ } else if (force_remap) {
+ DEBUG("HUGETLB_FORCE_REMAP is set, disabling filesz copy "
+ "optimization\n");
+ minimal_copy = 0;
}
env = getenv("HUGETLB_SHARE");
@@ -885,25 +1024,13 @@ static int check_env(void)
static void __attribute__ ((constructor)) setup_elflink(void)
{
- extern Elf_Ehdr __executable_start __attribute__((weak));
- ehdr = &__executable_start;
int ret, i;
- if (! ehdr) {
- DEBUG("Couldn't locate __executable_start, "
- "not attempting to remap segments\n");
- return;
- }
-
if (check_env())
return;
- parse_phdrs(ehdr);
-
- if (htlb_num_segs == 0) {
- DEBUG("Executable is not linked for hugepage segments\n");
+ if (parse_phdrs())
return;
- }
/* Do we need to find a share directory */
if (sharing) {
--
Nishanth Aravamudan <[EMAIL PROTECTED]>
IBM Linux Technology Center
-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier.
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Libhugetlbfs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel