Module Name: src Committed By: kamil Date: Sat Feb 29 04:24:34 UTC 2020
Modified Files: src/libexec/ld.elf_so: headers.c reloc.c rtld.h symbol.c Log Message: Implement DT_GNU_HASH DT_GNU_HASH serves the same purpose as DT_HASH, however it is a distinct and faster apprach implemented and designed in the GNU toolchain in 2006. DT_GNU_HASH is preferred whenever available. Original GNU benchmarks claim 50% faster dynamic linking time. https://www.sourceware.org/ml/binutils/2006-06/msg00418.html Code based on FreeBSD and OpenBSD, both were based on DragonFlyBSD. To generate a diff of this commit: cvs rdiff -u -r1.65 -r1.66 src/libexec/ld.elf_so/headers.c cvs rdiff -u -r1.115 -r1.116 src/libexec/ld.elf_so/reloc.c cvs rdiff -u -r1.137 -r1.138 src/libexec/ld.elf_so/rtld.h cvs rdiff -u -r1.71 -r1.72 src/libexec/ld.elf_so/symbol.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/libexec/ld.elf_so/headers.c diff -u src/libexec/ld.elf_so/headers.c:1.65 src/libexec/ld.elf_so/headers.c:1.66 --- src/libexec/ld.elf_so/headers.c:1.65 Sun Dec 30 11:55:15 2018 +++ src/libexec/ld.elf_so/headers.c Sat Feb 29 04:24:33 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $ */ +/* $NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $ */ /* * Copyright 1996 John D. Polstra. @@ -40,7 +40,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $"); +__RCSID("$NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $"); #endif /* not lint */ #include <err.h> @@ -166,26 +166,87 @@ _rtld_digest_dynamic(const char *execnam case DT_HASH: { + uint32_t nbuckets, nchains; const Elf_Symindx *hashtab = (const Elf_Symindx *) (obj->relocbase + dynp->d_un.d_ptr); if (hashtab[0] > UINT32_MAX) - obj->nbuckets = UINT32_MAX; + nbuckets = UINT32_MAX; else - obj->nbuckets = hashtab[0]; - obj->nchains = hashtab[1]; + nbuckets = hashtab[0]; + obj->nbuckets = nbuckets; + obj->nchains = (nchains = hashtab[1]); obj->buckets = hashtab + 2; obj->chains = obj->buckets + obj->nbuckets; + + /* Validity check */ + if (!obj->buckets || !nbuckets || !nchains) + continue; + + obj->sysv_hash = true; + + /* + * Should really be in _rtld_relocate_objects, + * but _rtld_symlook_obj might be used before. + */ + fast_divide32_prepare(obj->nbuckets, + &obj->nbuckets_m, + &obj->nbuckets_s1, + &obj->nbuckets_s2); + } + break; + + case DT_GNU_HASH: + { + uint32_t nmaskwords; + uint32_t nbuckets, symndx; + int bloom_size32; + bool nmw_power2; + const Elf_Symindx *hashtab = (const Elf_Symindx *) + (obj->relocbase + dynp->d_un.d_ptr); + + if (hashtab[0] > UINT32_MAX) + nbuckets = UINT32_MAX; + else + nbuckets = hashtab[0]; + obj->nbuckets_gnu = nbuckets; + + nmaskwords = hashtab[2]; + bloom_size32 = nmaskwords * (ELFSIZE / 32); + + obj->buckets_gnu = hashtab + 4 + bloom_size32; + + nmw_power2 = powerof2(nmaskwords); + + /* Validity check */ + if (!nmw_power2 || !nbuckets || !obj->buckets_gnu) + continue; + + obj->gnu_hash = true; + + obj->mask_bm_gnu = nmaskwords - 1; + obj->symndx_gnu = (symndx = hashtab[1]); + obj->shift2_gnu = hashtab[3]; + obj->bloom_gnu = (const Elf_Addr *)(hashtab + 4); + obj->chains_gnu = obj->buckets_gnu + nbuckets - symndx; + /* * Should really be in _rtld_relocate_objects, * but _rtld_symlook_obj might be used before. */ - if (obj->nbuckets) { - fast_divide32_prepare(obj->nbuckets, - &obj->nbuckets_m, - &obj->nbuckets_s1, - &obj->nbuckets_s2); - } + fast_divide32_prepare(nbuckets, + &obj->nbuckets_m_gnu, + &obj->nbuckets_s1_gnu, + &obj->nbuckets_s2_gnu); + + dbg(("found GNU Hash: buckets=%p " + "nbuckets=%lu chains=%p nchains=%u " + "bloom=%p mask_bm=%u shift2=%u " + "symndx=%u", + obj->buckets_gnu, obj->nbuckets_gnu, + obj->chains_gnu, obj->nchains_gnu, + obj->bloom_gnu, obj->mask_bm_gnu, + obj->shift2_gnu, obj->symndx_gnu)); } break; @@ -352,6 +413,26 @@ _rtld_digest_dynamic(const char *execnam obj->relalim = obj->pltrela; } + /* If the ELF Hash is present, "nchains" is the same in both hashes. */ + if (!obj->sysv_hash && obj->gnu_hash) { + uint_fast32_t i, nbucket, symndx; + + /* Otherwise, count the entries from the GNU Hash chain. */ + nbucket = obj->nbuckets_gnu; + symndx = obj->symndx_gnu; + + for (i = 0; i < nbucket; i++) { + Elf_Word bkt = obj->buckets_gnu[i]; + if (bkt == 0) + continue; + const uint32_t *hashval = &obj->chains_gnu[bkt]; + do { + symndx++; + } while ((*hashval++ & 1U) == 0); + } + obj->nchains_gnu = (uint32_t)symndx; + } + #ifdef RTLD_LOADER if (init != 0) obj->init = (Elf_Addr) obj->relocbase + init; Index: src/libexec/ld.elf_so/reloc.c diff -u src/libexec/ld.elf_so/reloc.c:1.115 src/libexec/ld.elf_so/reloc.c:1.116 --- src/libexec/ld.elf_so/reloc.c:1.115 Sat Feb 29 04:23:05 2020 +++ src/libexec/ld.elf_so/reloc.c Sat Feb 29 04:24:33 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $ */ +/* $NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $ */ /* * Copyright 1996 John D. Polstra. @@ -39,7 +39,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $"); +__RCSID("$NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $"); #endif /* not lint */ #include <err.h> @@ -174,9 +174,8 @@ _rtld_relocate_objects(Obj_Entry *first, int ok = 1; for (obj = first; obj != NULL; obj = obj->next) { - if (obj->nbuckets == 0 || obj->nchains == 0 || - obj->buckets == NULL || obj->symtab == NULL || - obj->strtab == NULL) { + if ((!obj->sysv_hash && !obj->gnu_hash) || + obj->symtab == NULL || obj->strtab == NULL) { _rtld_error("%s: Shared object has no run-time" " symbol table", obj->path); return -1; Index: src/libexec/ld.elf_so/rtld.h diff -u src/libexec/ld.elf_so/rtld.h:1.137 src/libexec/ld.elf_so/rtld.h:1.138 --- src/libexec/ld.elf_so/rtld.h:1.137 Sat Feb 29 04:23:05 2020 +++ src/libexec/ld.elf_so/rtld.h Sat Feb 29 04:24:33 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: rtld.h,v 1.137 2020/02/29 04:23:05 kamil Exp $ */ +/* $NetBSD: rtld.h,v 1.138 2020/02/29 04:24:33 kamil Exp $ */ /* * Copyright 1996 John D. Polstra. @@ -181,6 +181,7 @@ typedef struct Struct_Obj_Entry { Elf_Word gotsym; /* First dynamic symbol in GOT */ #endif + /* SysV Hash fields */ const Elf_Symindx *buckets; /* Hash table buckets array */ unsigned long unused1; /* Used to be nbuckets */ const Elf_Symindx *chains; /* Hash table chain array */ @@ -221,7 +222,9 @@ typedef struct Struct_Obj_Entry { tls_done:1, /* True if static TLS offset * has been allocated */ #endif - ref_nodel:1; /* Refcount increased to prevent dlclose */ + ref_nodel:1, /* Refcount increased to prevent dlclose */ + sysv_hash:1, /* SysV Hash available */ + gnu_hash:1; /* GNU Hash available */ struct link_map linkmap; /* for GDB */ @@ -234,10 +237,25 @@ typedef struct Struct_Obj_Entry { void *ehdr; + /* SysV Hash fields */ uint32_t nbuckets; /* Number of buckets */ uint32_t nbuckets_m; /* Precomputed for fast remainder */ uint8_t nbuckets_s1; uint8_t nbuckets_s2; + + /* GNU Hash fields */ + const uint32_t *buckets_gnu; /* Hash table buckets array */ + uint32_t nbuckets_gnu; /* Number of GNU hash buckets */ + uint32_t nbuckets_m_gnu; /* Precomputed for fast remainder */ + uint8_t nbuckets_s1_gnu; + uint8_t nbuckets_s2_gnu; + const uint32_t *chains_gnu; /* Hash table chain array */ +#define nchains_gnu nchains /* Number of symbols, shared with SysV Hash */ + const Elf_Addr *bloom_gnu; + uint32_t symndx_gnu; /* First accessible symbol on dynsym table */ + uint32_t mask_bm_gnu; /* Bloom filter words - 1 (bitmask) */ + uint32_t shift2_gnu; /* Bloom filter shift count */ + size_t pathlen; /* Pathname length */ SIMPLEQ_HEAD(, Struct_Name_Entry) names; /* List of names for this * object we know about. */ Index: src/libexec/ld.elf_so/symbol.c diff -u src/libexec/ld.elf_so/symbol.c:1.71 src/libexec/ld.elf_so/symbol.c:1.72 --- src/libexec/ld.elf_so/symbol.c:1.71 Sat Feb 29 04:23:05 2020 +++ src/libexec/ld.elf_so/symbol.c Sat Feb 29 04:24:33 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $ */ +/* $NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $ */ /* * Copyright 1996 John D. Polstra. @@ -40,7 +40,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $"); +__RCSID("$NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $"); #endif /* not lint */ #include <err.h> @@ -327,18 +327,17 @@ _rtld_symlook_obj_matched_symbol(const c * the given name. Returns a pointer to the symbol, or NULL if no * definition was found. * - * The symbol's hash value is passed in for efficiency reasons; that - * eliminates many recomputations of the hash value. + * SysV Hash version. */ -const Elf_Sym * -_rtld_symlook_obj(const char *name, Elf_Hash *hash, +static const Elf_Sym * +_rtld_symlook_obj_sysv(const char *name, unsigned long hash, const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry) { unsigned long symnum; const Elf_Sym *vsymp = NULL; int vcount = 0; - for (symnum = obj->buckets[fast_remainder32(hash->sysv, obj->nbuckets, + for (symnum = obj->buckets[fast_remainder32(hash, obj->nbuckets, obj->nbuckets_m, obj->nbuckets_s1, obj->nbuckets_s2)]; symnum != ELF_SYM_UNDEFINED; symnum = obj->chains[symnum]) { @@ -355,6 +354,82 @@ _rtld_symlook_obj(const char *name, Elf_ } /* + * Search the symbol table of a single shared object for a symbol of + * the given name. Returns a pointer to the symbol, or NULL if no + * definition was found. + * + * GNU Hash version. + */ +static const Elf_Sym * +_rtld_symlook_obj_gnu(const char *name, unsigned long hash, + const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry) +{ + unsigned long symnum; + const Elf_Sym *vsymp = NULL; + const Elf32_Word *hashval; + Elf_Addr bloom_word; + Elf32_Word bucket; + int vcount = 0; + unsigned int h1, h2; + + /* Pick right bitmask word from Bloom filter array */ + bloom_word = obj->bloom_gnu[(hash / ELFSIZE) & obj->mask_bm_gnu]; + + /* Calculate modulus word size of gnu hash and its derivative */ + h1 = hash & (ELFSIZE - 1); + h2 = ((hash >> obj->shift2_gnu) & (ELFSIZE - 1)); + + /* Filter out the "definitely not in set" queries */ + if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0) + return NULL; + + /* Locate hash chain and corresponding value element*/ + bucket = obj->buckets_gnu[fast_remainder32(hash, obj->nbuckets_gnu, + obj->nbuckets_m_gnu, obj->nbuckets_s1_gnu, obj->nbuckets_s2_gnu)]; + if (bucket == 0) + return NULL; + + hashval = &obj->chains_gnu[bucket]; + do { + if (((*hashval ^ hash) >> 1) == 0) { + symnum = hashval - obj->chains_gnu; + + if (_rtld_symlook_obj_matched_symbol(name, obj, flags, + ventry, symnum, &vsymp, &vcount)) { + return vsymp; + } + } + } while ((*hashval++ & 1) == 0); + if (vcount == 1) + return vsymp; + return NULL; +} + +/* + * Search the symbol table of a single shared object for a symbol of + * the given name. Returns a pointer to the symbol, or NULL if no + * definition was found. + * + * The symbol's hash value is passed in for efficiency reasons; that + * eliminates many recomputations of the hash value. + * + * Redirect to either GNU Hash (whenever available) or ELF Hash. + */ +const Elf_Sym * +_rtld_symlook_obj(const char *name, Elf_Hash *hash, + const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry) +{ + + assert(obj->sysv_hash || obj->gnu_hash); + + /* Always prefer the GNU Hash as it is faster. */ + if (obj->gnu_hash) + return _rtld_symlook_obj_gnu(name, hash->gnu, obj, flags, ventry); + else + return _rtld_symlook_obj_sysv(name, hash->sysv, obj, flags, ventry); +} + +/* * Given a symbol number in a referencing object, find the corresponding * definition of the symbol. Returns a pointer to the symbol, or NULL if * no definition was found. Returns a pointer to the Obj_Entry of the