During some benchmarking, it was noted that the current morecore
implementation will sometimes violate brk() semantics when the library
is PRELOAD'd. We always use hugetlbfs_vaddr_granularity() to bump to the
next appropriate mapping location for the heapbase.  However, if any of
the linked-in libraries do a malloc before our constructor runs, we will
then jump in the heap from where there mallocs ended to our heapbase.
glibc, normally during trimming, will note this inconsistency and decide
our morecore implementation is busted and fallback to normal malloc.

Run- and compile-time tested, on top of my patches for trimming and
vaddr_granularity().

Signed-off-by: Nishanth Aravamudan <[EMAIL PROTECTED]>

diff -urpN libhugetlbfs-20060911/morecore.c libhugetlbfs-20060911-dev/morecore.c
--- libhugetlbfs-20060911/morecore.c    2006-09-07 14:26:48.000000000 +0000
+++ libhugetlbfs-20060911-dev/morecore.c        2006-09-19 21:26:48.000000000 
+0000
@@ -38,6 +38,87 @@ static void *heapbase;
 static void *heaptop;
 static long mapsize;
 
+/**
+ * align_heap - fixup heap to satisfy brk() semantics
+ * @heapaddr: unaligned heap address
+ *
+ * To satisfy brk() semantics, the heap must be continuous in memory.
+ * However, if libhugetlbfs is PRELOAD'd and another library does a
+ * malloc before we run our constructor, we will jump to the next VMA
+ * boundary and leave a large hole in the address space (nearly 256M on
+ * ppc, for instance). To work around this, we malloc up to two chunks
+ * to move the top of the heap, in brk()'s view.
+ *
+ * returns:    -1, if unable to align the heap properly
+ *             the aligned heap address, otherwise
+ */
+static int align_heap(unsigned long heapaddr)
+{
+       unsigned long aligned_heapaddr, pad1, pad2;
+       void *first_pad, *second_pad;
+
+       aligned_heapaddr = ALIGN(heapaddr,
+                               hugetlbfs_vaddr_granularity(heapaddr));
+
+       if (heapaddr == aligned_heapaddr) {
+               DEBUG("aligned after 0 fixes\n");
+               return heapaddr;
+       }
+
+       if (heapaddr < aligned_heapaddr) {
+               DEBUG("heapaddr = 0x%lx, aligned_heapaddr = 0x%lx\n",
+                                        heapaddr, aligned_heapaddr);
+               /* Force brk() to be used and disable padding */
+               mallopt(M_MMAP_MAX, 0);
+               mallopt(M_TOP_PAD, 0);
+
+               /* Take away one byte to prevent overallocation */
+               pad1 = aligned_heapaddr - heapaddr - 1;
+               first_pad = malloc(pad1);
+               if (!first_pad) {
+                       WARNING("Failed to malloc %lu bytes\n", pad1);
+                       return -1;
+               }
+
+               heapaddr = (unsigned long)sbrk(0);
+               if (heapaddr == aligned_heapaddr) {
+                       DEBUG("Heap aligned after 1 fixup\n");
+                       return heapaddr;
+               }
+
+               /*
+                * If the top of the heap hadn't yet reached sbrk(0),
+                * then we need to allocate again
+                */
+               if (heapaddr < aligned_heapaddr) {
+                       /* Take away one byte to prevent overallocation */
+                       pad2 = aligned_heapaddr - heapaddr - 1;
+
+                       second_pad = malloc(pad2);
+                       if (!second_pad) {
+                               WARNING("Failed to malloc %lu bytes\n",
+                                                                pad2);
+                               free(first_pad);
+                               return -1;
+                       }
+
+                       heapaddr = (unsigned long)sbrk(0);
+                       if (heapaddr == aligned_heapaddr) {
+                               DEBUG("Heap aligned after 2 fixups\n");
+                               return heapaddr;
+                       }
+               }
+       }
+
+       /*
+        * Either:
+        * 1) our heap is now offset above the aligned address, or
+        * 2) we failed to align after two tries
+        * In either case, bail out
+        */
+       return -1;
+}
+
 /*
  * Our plan is to ask for pages 'roughly' at the BASE.  We expect and
  * require the kernel to offer us sequential pages from wherever it
@@ -130,6 +223,7 @@ static void __attribute__((constructor))
 {
        char *env, *ep;
        unsigned long heapaddr;
+       long ret;
 
        env = getenv("HUGETLB_MORECORE");
        if (! env)
@@ -157,9 +251,12 @@ static void __attribute__((constructor))
                }
        } else {
                heapaddr = (unsigned long)sbrk(0);
-               heapaddr = ALIGN(heapaddr, hugetlbfs_vaddr_granularity());
        }
 
+       ret = align_heap(heapaddr);
+       if (ret < 0)
+               return;
+       heapaddr = ret;
        DEBUG("setup_morecore(): heapaddr = 0x%lx\n", heapaddr);
 
        heaptop = heapbase = (void *)heapaddr;

-- 
Nishanth Aravamudan <[EMAIL PROTECTED]>
IBM Linux Technology Center

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Libhugetlbfs-devel mailing list
Libhugetlbfs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel

Reply via email to