On 23/03/17 13:47, [email protected] wrote:
> From: Jeff Hostetler <[email protected]>
>
[snip]
>
> diff --git a/name-hash.c b/name-hash.c
> index 3f7722a..71ef07e 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -23,15 +23,21 @@ static int dir_entry_cmp(const struct dir_entry *e1,
> name ? name : e2->name, e1->namelen);
> }
>
> -static struct dir_entry *find_dir_entry(struct index_state *istate,
> - const char *name, unsigned int namelen)
> +static struct dir_entry *find_dir_entry__hash(struct index_state *istate,
> + const char *name, unsigned int namelen, unsigned int hash)
> {
> struct dir_entry key;
> - hashmap_entry_init(&key, memihash(name, namelen));
> + hashmap_entry_init(&key, hash);
> key.namelen = namelen;
> return hashmap_get(&istate->dir_hash, &key, name);
> }
>
> +static struct dir_entry *find_dir_entry(struct index_state *istate,
> + const char *name, unsigned int namelen)
> +{
> + return find_dir_entry__hash(istate, name, namelen, memihash(name,
> namelen));
> +}
> +
> static struct dir_entry *hash_dir_entry(struct index_state *istate,
> struct cache_entry *ce, int namelen)
> {
> @@ -112,21 +118,493 @@ static int cache_entry_cmp(const struct cache_entry
> *ce1,
> return remove ? !(ce1 == ce2) : 0;
> }
>
> -static void lazy_init_name_hash(struct index_state *istate)
> +static int lazy_try_threaded = 1;
> +static int lazy_nr_dir_threads;
> +
> +#ifdef NO_PTHREADS
> +
> +static inline int lookup_lazy_params(struct index_state *istate)
> {
> - int nr;
> + return 0;
> +}
> +
> +static inline void threaded_lazy_init_name_hash(
> + struct index_state *istate)
> +{
> +}
> +
> +#else
> +
> +#include "thread-utils.h"
> +
> +/*
> + * Set a minimum number of cache_entries that we will handle per
> + * thread and use that to decide how many threads to run (upto
> + * the number on the system).
> + *
> + * For guidance setting the lower per-thread bound, see:
> + * t/helper/test-lazy-init-name-hash --analyze
> + */
> +#define LAZY_THREAD_COST (2000)
> +
> +/*
> + * We use n mutexes to guard n partitions of the "istate->dir_hash"
> + * hashtable. Since "find" and "insert" operations will hash to a
> + * particular bucket and modify/search a single chain, we can say
> + * that "all chains mod n" are guarded by the same mutex -- rather
> + * than having a single mutex to guard the entire table. (This does
> + * require that we disable "rehashing" on the hashtable.)
> + *
> + * So, a larger value here decreases the probability of a collision
> + * and the time that each thread must wait for the mutex.
> + */
> +#define LAZY_MAX_MUTEX (32)
> +
> +static pthread_mutex_t *lazy_dir_mutex_array;
> +
> +/*
> + * An array of lazy_entry items is used by the n threads in
> + * the directory parse (first) phase to (lock-free) store the
> + * intermediate results. These values are then referenced by
> + * the 2 threads in the second phase.
> + */
> +struct lazy_entry {
> + struct dir_entry *dir;
> + unsigned int hash_dir;
> + unsigned int hash_name;
> +};
> +
> +/*
> + * Decide if we want to use threads (if available) to load
> + * the hash tables. We set "lazy_nr_dir_threads" to zero when
> + * it is not worth it.
> + */
> +static int lookup_lazy_params(struct index_state *istate)
> +{
> + int nr_cpus;
> +
> + lazy_nr_dir_threads = 0;
> +
> + if (!lazy_try_threaded)
> + return 0;
> +
> + /*
> + * If we are respecting case, just use the original
> + * code to build the "istate->name_hash". We don't
> + * need the complexity here.
> + */
> + if (!ignore_case)
> + return 0;
> +
> + nr_cpus = online_cpus();
> + if (nr_cpus < 2)
> + return 0;
> +
> + if (istate->cache_nr < 2 * LAZY_THREAD_COST)
> + return 0;
> +
> + if (istate->cache_nr < nr_cpus * LAZY_THREAD_COST)
> + nr_cpus = istate->cache_nr / LAZY_THREAD_COST;
> + lazy_nr_dir_threads = nr_cpus;
> + return lazy_nr_dir_threads;
> +}
> +
> +/*
> + * Initialize n mutexes for use when searching and inserting
> + * into "istate->dir_hash". All "dir" threads are trying
> + * to insert partial pathnames into the hash as they iterate
> + * over their portions of the index, so lock contention is
> + * high.
> + *
> + * However, the hashmap is going to put items into bucket
> + * chains based on their hash values. Use that to create n
> + * mutexes and lock on mutex[bucket(hash) % n]. This will
> + * decrease the collision rate by (hopefully) by a factor of n.
> + */
> +static void init_dir_mutex(void)
> +{
> + int j;
> +
> + lazy_dir_mutex_array = xcalloc(LAZY_MAX_MUTEX, sizeof(pthread_mutex_t));
> +
> + for (j = 0; j < LAZY_MAX_MUTEX; j++)
> + init_recursive_mutex(&lazy_dir_mutex_array[j]);
> +}
> +
> +static void cleanup_dir_mutex(void)
> +{
> + int j;
> +
> + for (j = 0; j < LAZY_MAX_MUTEX; j++)
> + pthread_mutex_destroy(&lazy_dir_mutex_array[j]);
> +
> + free(lazy_dir_mutex_array);
> +}
> +
> +static void lock_dir_mutex(int j)
> +{
> + pthread_mutex_lock(&lazy_dir_mutex_array[j]);
> +}
> +
> +static void unlock_dir_mutex(int j)
> +{
> + pthread_mutex_unlock(&lazy_dir_mutex_array[j]);
> +}
> +
> +static inline int compute_dir_lock_nr(
> + const struct hashmap *map,
> + unsigned int hash)
> +{
> + return hashmap_bucket(map, hash) % LAZY_MAX_MUTEX;
> +}
> +
> +static struct dir_entry *hash_dir_entry_with_parent_and_prefix(
> + struct index_state *istate,
> + struct dir_entry *parent,
> + struct strbuf *prefix)
> +{
> + struct dir_entry *dir;
> + unsigned int hash;
> + int lock_nr;
> +
> + /*
> + * Either we have a parent directory and path with slash(es)
> + * or the directory is an immediate child of the root directory.
> + */
> + assert((parent != NULL) ^ (strchr(prefix->buf, '/') == 0));
sparse complains about 'Using plain integer as a NULL pointer', (the
return from strchr() is NULL, if '/' is not found) so maybe:
+ assert((parent != NULL) ^ (strchr(prefix->buf, '/') == NULL));
ATB,
Ramsay Jones