Module Name:    src
Committed By:   kamil
Date:           Sat Feb 29 04:24:34 UTC 2020

Modified Files:
        src/libexec/ld.elf_so: headers.c reloc.c rtld.h symbol.c

Log Message:
Implement DT_GNU_HASH

DT_GNU_HASH serves the same purpose as DT_HASH, however it is a distinct
and faster apprach implemented and designed in the GNU toolchain in 2006.

DT_GNU_HASH is preferred whenever available.

Original GNU benchmarks claim 50% faster dynamic linking time.
https://www.sourceware.org/ml/binutils/2006-06/msg00418.html

Code based on FreeBSD and OpenBSD, both were based on DragonFlyBSD.


To generate a diff of this commit:
cvs rdiff -u -r1.65 -r1.66 src/libexec/ld.elf_so/headers.c
cvs rdiff -u -r1.115 -r1.116 src/libexec/ld.elf_so/reloc.c
cvs rdiff -u -r1.137 -r1.138 src/libexec/ld.elf_so/rtld.h
cvs rdiff -u -r1.71 -r1.72 src/libexec/ld.elf_so/symbol.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/libexec/ld.elf_so/headers.c
diff -u src/libexec/ld.elf_so/headers.c:1.65 src/libexec/ld.elf_so/headers.c:1.66
--- src/libexec/ld.elf_so/headers.c:1.65	Sun Dec 30 11:55:15 2018
+++ src/libexec/ld.elf_so/headers.c	Sat Feb 29 04:24:33 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $	 */
+/*	$NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $	 */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -40,7 +40,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $");
+__RCSID("$NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $");
 #endif /* not lint */
 
 #include <err.h>
@@ -166,26 +166,87 @@ _rtld_digest_dynamic(const char *execnam
 
 		case DT_HASH:
 			{
+				uint32_t nbuckets, nchains;
 				const Elf_Symindx *hashtab = (const Elf_Symindx *)
 				    (obj->relocbase + dynp->d_un.d_ptr);
 
 				if (hashtab[0] > UINT32_MAX)
-					obj->nbuckets = UINT32_MAX;
+					nbuckets = UINT32_MAX;
 				else
-					obj->nbuckets = hashtab[0];
-				obj->nchains = hashtab[1];
+					nbuckets = hashtab[0];
+				obj->nbuckets = nbuckets;
+				obj->nchains = (nchains = hashtab[1]);
 				obj->buckets = hashtab + 2;
 				obj->chains = obj->buckets + obj->nbuckets;
+
+				/* Validity check */
+				if (!obj->buckets || !nbuckets || !nchains)
+					continue;
+
+				obj->sysv_hash = true;
+
+				/*
+				 * Should really be in _rtld_relocate_objects,
+				 * but _rtld_symlook_obj might be used before.
+				 */
+				fast_divide32_prepare(obj->nbuckets,
+				    &obj->nbuckets_m,
+				    &obj->nbuckets_s1,
+				    &obj->nbuckets_s2);
+			}
+			break;
+
+		case DT_GNU_HASH:
+			{
+				uint32_t nmaskwords;
+				uint32_t nbuckets, symndx;
+				int bloom_size32;
+				bool nmw_power2;
+				const Elf_Symindx *hashtab = (const Elf_Symindx *)
+				    (obj->relocbase + dynp->d_un.d_ptr);
+
+				if (hashtab[0] > UINT32_MAX)
+					nbuckets = UINT32_MAX;
+				else
+					nbuckets = hashtab[0];
+				obj->nbuckets_gnu = nbuckets;
+
+				nmaskwords = hashtab[2];
+				bloom_size32 = nmaskwords * (ELFSIZE / 32);
+
+				obj->buckets_gnu = hashtab + 4 + bloom_size32;
+
+				nmw_power2 = powerof2(nmaskwords);
+
+				/* Validity check */
+				if (!nmw_power2 || !nbuckets || !obj->buckets_gnu)
+					continue;
+
+				obj->gnu_hash = true;
+
+				obj->mask_bm_gnu = nmaskwords - 1;
+				obj->symndx_gnu = (symndx = hashtab[1]);
+				obj->shift2_gnu = hashtab[3];
+				obj->bloom_gnu = (const Elf_Addr *)(hashtab + 4);
+				obj->chains_gnu = obj->buckets_gnu + nbuckets - symndx;
+
 				/*
 				 * Should really be in _rtld_relocate_objects,
 				 * but _rtld_symlook_obj might be used before.
 				 */
-				if (obj->nbuckets) {
-					fast_divide32_prepare(obj->nbuckets,
-					    &obj->nbuckets_m,
-					    &obj->nbuckets_s1,
-					    &obj->nbuckets_s2);
-				}
+				fast_divide32_prepare(nbuckets,
+				    &obj->nbuckets_m_gnu,
+				    &obj->nbuckets_s1_gnu,
+				    &obj->nbuckets_s2_gnu);
+
+				dbg(("found GNU Hash: buckets=%p "
+				     "nbuckets=%lu chains=%p nchains=%u "
+				     "bloom=%p mask_bm=%u shift2=%u "
+				     "symndx=%u",
+				    obj->buckets_gnu, obj->nbuckets_gnu,
+				    obj->chains_gnu, obj->nchains_gnu,
+				    obj->bloom_gnu, obj->mask_bm_gnu,
+				    obj->shift2_gnu, obj->symndx_gnu));
 			}
 			break;
 
@@ -352,6 +413,26 @@ _rtld_digest_dynamic(const char *execnam
 			obj->relalim = obj->pltrela;
 	}
 
+	/* If the ELF Hash is present, "nchains" is the same in both hashes. */
+	if (!obj->sysv_hash && obj->gnu_hash) {
+		uint_fast32_t i, nbucket, symndx;
+
+		/* Otherwise, count the entries from the GNU Hash chain. */
+		nbucket = obj->nbuckets_gnu;
+		symndx = obj->symndx_gnu;
+
+		for (i = 0; i < nbucket; i++) {
+			Elf_Word bkt = obj->buckets_gnu[i];
+			if (bkt == 0)
+				continue;
+			const uint32_t *hashval = &obj->chains_gnu[bkt];
+			do {
+				symndx++;
+			} while ((*hashval++ & 1U) == 0);
+		}
+		obj->nchains_gnu = (uint32_t)symndx;
+	}
+
 #ifdef RTLD_LOADER
 	if (init != 0)
 		obj->init = (Elf_Addr) obj->relocbase + init;

Index: src/libexec/ld.elf_so/reloc.c
diff -u src/libexec/ld.elf_so/reloc.c:1.115 src/libexec/ld.elf_so/reloc.c:1.116
--- src/libexec/ld.elf_so/reloc.c:1.115	Sat Feb 29 04:23:05 2020
+++ src/libexec/ld.elf_so/reloc.c	Sat Feb 29 04:24:33 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $	 */
+/*	$NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $	 */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -39,7 +39,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $");
+__RCSID("$NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $");
 #endif /* not lint */
 
 #include <err.h>
@@ -174,9 +174,8 @@ _rtld_relocate_objects(Obj_Entry *first,
 	int ok = 1;
 
 	for (obj = first; obj != NULL; obj = obj->next) {
-		if (obj->nbuckets == 0 || obj->nchains == 0 ||
-		    obj->buckets == NULL || obj->symtab == NULL ||
-		    obj->strtab == NULL) {
+		if ((!obj->sysv_hash && !obj->gnu_hash) ||
+		    obj->symtab == NULL || obj->strtab == NULL) {
 			_rtld_error("%s: Shared object has no run-time"
 			    " symbol table", obj->path);
 			return -1;

Index: src/libexec/ld.elf_so/rtld.h
diff -u src/libexec/ld.elf_so/rtld.h:1.137 src/libexec/ld.elf_so/rtld.h:1.138
--- src/libexec/ld.elf_so/rtld.h:1.137	Sat Feb 29 04:23:05 2020
+++ src/libexec/ld.elf_so/rtld.h	Sat Feb 29 04:24:33 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: rtld.h,v 1.137 2020/02/29 04:23:05 kamil Exp $	 */
+/*	$NetBSD: rtld.h,v 1.138 2020/02/29 04:24:33 kamil Exp $	 */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -181,6 +181,7 @@ typedef struct Struct_Obj_Entry {
 	Elf_Word        gotsym;		/* First dynamic symbol in GOT */
 #endif
 
+	/* SysV Hash fields */
 	const Elf_Symindx *buckets;	/* Hash table buckets array */
 	unsigned long	unused1;	/* Used to be nbuckets */
 	const Elf_Symindx *chains;	/* Hash table chain array */
@@ -221,7 +222,9 @@ typedef struct Struct_Obj_Entry {
 			tls_done:1,	/* True if static TLS offset
 					 * has been allocated */
 #endif
-			ref_nodel:1;	/* Refcount increased to prevent dlclose */
+			ref_nodel:1,	/* Refcount increased to prevent dlclose */
+			sysv_hash:1,	/* SysV Hash available */
+			gnu_hash:1;	/* GNU Hash available */
 
 	struct link_map linkmap;	/* for GDB */
 
@@ -234,10 +237,25 @@ typedef struct Struct_Obj_Entry {
 
 	void		*ehdr;
 
+	/* SysV Hash fields */
 	uint32_t        nbuckets;	/* Number of buckets */
 	uint32_t        nbuckets_m;	/* Precomputed for fast remainder */
 	uint8_t         nbuckets_s1;
 	uint8_t         nbuckets_s2;
+
+	/* GNU Hash fields */
+	const uint32_t *buckets_gnu;	/* Hash table buckets array */
+	uint32_t	nbuckets_gnu;	/* Number of GNU hash buckets */
+	uint32_t	nbuckets_m_gnu;	/* Precomputed for fast remainder */
+	uint8_t		nbuckets_s1_gnu;
+	uint8_t		nbuckets_s2_gnu;
+	const uint32_t *chains_gnu;	/* Hash table chain array */
+#define nchains_gnu	nchains		/* Number of symbols, shared with SysV Hash */
+	const Elf_Addr *bloom_gnu;
+	uint32_t	symndx_gnu;	/* First accessible symbol on dynsym table */
+	uint32_t	mask_bm_gnu;	/* Bloom filter words - 1 (bitmask) */
+	uint32_t	shift2_gnu;	/* Bloom filter shift count */
+
 	size_t		pathlen;	/* Pathname length */
 	SIMPLEQ_HEAD(, Struct_Name_Entry) names; /* List of names for this
 						  * object we know about. */

Index: src/libexec/ld.elf_so/symbol.c
diff -u src/libexec/ld.elf_so/symbol.c:1.71 src/libexec/ld.elf_so/symbol.c:1.72
--- src/libexec/ld.elf_so/symbol.c:1.71	Sat Feb 29 04:23:05 2020
+++ src/libexec/ld.elf_so/symbol.c	Sat Feb 29 04:24:33 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $	 */
+/*	$NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $	 */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -40,7 +40,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $");
+__RCSID("$NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $");
 #endif /* not lint */
 
 #include <err.h>
@@ -327,18 +327,17 @@ _rtld_symlook_obj_matched_symbol(const c
  * the given name.  Returns a pointer to the symbol, or NULL if no
  * definition was found.
  *
- * The symbol's hash value is passed in for efficiency reasons; that
- * eliminates many recomputations of the hash value.
+ * SysV Hash version.
  */
-const Elf_Sym *
-_rtld_symlook_obj(const char *name, Elf_Hash *hash,
+static const Elf_Sym *
+_rtld_symlook_obj_sysv(const char *name, unsigned long hash,
     const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
 {
 	unsigned long symnum;
 	const Elf_Sym *vsymp = NULL;
 	int vcount = 0;
 
-	for (symnum = obj->buckets[fast_remainder32(hash->sysv, obj->nbuckets,
+	for (symnum = obj->buckets[fast_remainder32(hash, obj->nbuckets,
 	     obj->nbuckets_m, obj->nbuckets_s1, obj->nbuckets_s2)];
 	     symnum != ELF_SYM_UNDEFINED;
 	     symnum = obj->chains[symnum]) {
@@ -355,6 +354,82 @@ _rtld_symlook_obj(const char *name, Elf_
 }
 
 /*
+ * Search the symbol table of a single shared object for a symbol of
+ * the given name.  Returns a pointer to the symbol, or NULL if no
+ * definition was found.
+ *
+ * GNU Hash version.
+ */
+static const Elf_Sym *
+_rtld_symlook_obj_gnu(const char *name, unsigned long hash,
+    const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
+{
+	unsigned long symnum;
+	const Elf_Sym *vsymp = NULL;
+	const Elf32_Word *hashval;
+	Elf_Addr bloom_word;
+	Elf32_Word bucket;
+	int vcount = 0;
+	unsigned int h1, h2;
+
+	/* Pick right bitmask word from Bloom filter array */
+	bloom_word = obj->bloom_gnu[(hash / ELFSIZE) & obj->mask_bm_gnu];
+
+	/* Calculate modulus word size of gnu hash and its derivative */
+	h1 = hash & (ELFSIZE - 1);
+	h2 = ((hash >> obj->shift2_gnu) & (ELFSIZE - 1));
+
+	/* Filter out the "definitely not in set" queries */
+	if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0)
+		return NULL;
+
+	/* Locate hash chain and corresponding value element*/
+	bucket = obj->buckets_gnu[fast_remainder32(hash, obj->nbuckets_gnu,
+	     obj->nbuckets_m_gnu, obj->nbuckets_s1_gnu, obj->nbuckets_s2_gnu)];
+	if (bucket == 0)
+		return NULL;
+
+	hashval = &obj->chains_gnu[bucket];
+	do {
+		if (((*hashval ^ hash) >> 1) == 0) {
+			symnum = hashval - obj->chains_gnu;
+
+			if (_rtld_symlook_obj_matched_symbol(name, obj, flags,
+			    ventry, symnum, &vsymp, &vcount)) {
+				return vsymp;
+			}
+		}
+	} while ((*hashval++ & 1) == 0);
+	if (vcount == 1)
+		return vsymp;
+	return NULL;
+}
+
+/*
+ * Search the symbol table of a single shared object for a symbol of
+ * the given name.  Returns a pointer to the symbol, or NULL if no
+ * definition was found.
+ *
+ * The symbol's hash value is passed in for efficiency reasons; that
+ * eliminates many recomputations of the hash value.
+ *
+ * Redirect to either GNU Hash (whenever available) or ELF Hash.
+ */
+const Elf_Sym *
+_rtld_symlook_obj(const char *name, Elf_Hash *hash,
+    const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
+{
+
+	assert(obj->sysv_hash || obj->gnu_hash);
+
+	/* Always prefer the GNU Hash as it is faster. */
+	if (obj->gnu_hash)
+		return _rtld_symlook_obj_gnu(name, hash->gnu, obj, flags, ventry);
+	else
+		return _rtld_symlook_obj_sysv(name, hash->sysv, obj, flags, ventry);
+}
+
+/*
  * Given a symbol number in a referencing object, find the corresponding
  * definition of the symbol.  Returns a pointer to the symbol, or NULL if
  * no definition was found.  Returns a pointer to the Obj_Entry of the

Reply via email to