During some benchmarking, it was noted that the current morecore implementation will sometimes violate brk() semantics when the library is PRELOAD'd. We always use hugetlbfs_vaddr_granularity() to bump to the next appropriate mapping location for the heapbase. However, if any of the linked-in libraries do a malloc before our constructor runs, we will then jump in the heap from where there mallocs ended to our heapbase. glibc, normally during trimming, will note this inconsistency and decide our morecore implementation is busted and fallback to normal malloc.
Run- and compile-time tested, on top of my patches for trimming and vaddr_granularity(). Signed-off-by: Nishanth Aravamudan <[EMAIL PROTECTED]> diff -urpN libhugetlbfs-20060911/morecore.c libhugetlbfs-20060911-dev/morecore.c --- libhugetlbfs-20060911/morecore.c 2006-09-07 14:26:48.000000000 +0000 +++ libhugetlbfs-20060911-dev/morecore.c 2006-09-19 21:26:48.000000000 +0000 @@ -38,6 +38,87 @@ static void *heapbase; static void *heaptop; static long mapsize; +/** + * align_heap - fixup heap to satisfy brk() semantics + * @heapaddr: unaligned heap address + * + * To satisfy brk() semantics, the heap must be continuous in memory. + * However, if libhugetlbfs is PRELOAD'd and another library does a + * malloc before we run our constructor, we will jump to the next VMA + * boundary and leave a large hole in the address space (nearly 256M on + * ppc, for instance). To work around this, we malloc up to two chunks + * to move the top of the heap, in brk()'s view. + * + * returns: -1, if unable to align the heap properly + * the aligned heap address, otherwise + */ +static int align_heap(unsigned long heapaddr) +{ + unsigned long aligned_heapaddr, pad1, pad2; + void *first_pad, *second_pad; + + aligned_heapaddr = ALIGN(heapaddr, + hugetlbfs_vaddr_granularity(heapaddr)); + + if (heapaddr == aligned_heapaddr) { + DEBUG("aligned after 0 fixes\n"); + return heapaddr; + } + + if (heapaddr < aligned_heapaddr) { + DEBUG("heapaddr = 0x%lx, aligned_heapaddr = 0x%lx\n", + heapaddr, aligned_heapaddr); + /* Force brk() to be used and disable padding */ + mallopt(M_MMAP_MAX, 0); + mallopt(M_TOP_PAD, 0); + + /* Take away one byte to prevent overallocation */ + pad1 = aligned_heapaddr - heapaddr - 1; + first_pad = malloc(pad1); + if (!first_pad) { + WARNING("Failed to malloc %lu bytes\n", pad1); + return -1; + } + + heapaddr = (unsigned long)sbrk(0); + if (heapaddr == aligned_heapaddr) { + DEBUG("Heap aligned after 1 fixup\n"); + return heapaddr; + } + + /* + * If the top of the heap hadn't yet reached sbrk(0), + * then we need to allocate again + */ + if (heapaddr < aligned_heapaddr) { + /* Take away one byte to prevent overallocation */ + pad2 = aligned_heapaddr - heapaddr - 1; + + second_pad = malloc(pad2); + if (!second_pad) { + WARNING("Failed to malloc %lu bytes\n", + pad2); + free(first_pad); + return -1; + } + + heapaddr = (unsigned long)sbrk(0); + if (heapaddr == aligned_heapaddr) { + DEBUG("Heap aligned after 2 fixups\n"); + return heapaddr; + } + } + } + + /* + * Either: + * 1) our heap is now offset above the aligned address, or + * 2) we failed to align after two tries + * In either case, bail out + */ + return -1; +} + /* * Our plan is to ask for pages 'roughly' at the BASE. We expect and * require the kernel to offer us sequential pages from wherever it @@ -130,6 +223,7 @@ static void __attribute__((constructor)) { char *env, *ep; unsigned long heapaddr; + long ret; env = getenv("HUGETLB_MORECORE"); if (! env) @@ -157,9 +251,12 @@ static void __attribute__((constructor)) } } else { heapaddr = (unsigned long)sbrk(0); - heapaddr = ALIGN(heapaddr, hugetlbfs_vaddr_granularity()); } + ret = align_heap(heapaddr); + if (ret < 0) + return; + heapaddr = ret; DEBUG("setup_morecore(): heapaddr = 0x%lx\n", heapaddr); heaptop = heapbase = (void *)heapaddr; -- Nishanth Aravamudan <[EMAIL PROTECTED]> IBM Linux Technology Center ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys -- and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Libhugetlbfs-devel mailing list Libhugetlbfs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libhugetlbfs-devel