The find_exact_renames function currently only uses the hash table for
grouping, i.e.:

1. add sources
2. add destinations
3. iterate all buckets, per bucket:
4. split sources from destinations
5. iterate destinations, per destination:
6. iterate sources to find best match

This can be simplified by utilizing the lookup functionality of the hash
table, i.e.:

1. add sources
2. iterate destinations, per destination:
3. lookup sources matching the current destination
4. iterate sources to find best match

This saves several iterations and file_similarity allocations for the
destinations.

Signed-off-by: Karsten Blees <bl...@dcon.de>
---
 diffcore-rename.c | 75 +++++++++++++++----------------------------------------
 1 file changed, 20 insertions(+), 55 deletions(-)

diff --git a/diffcore-rename.c b/diffcore-rename.c
index 008a60c..82b7975 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_)
 }
 
 struct file_similarity {
-       int src_dst, index;
+       int index;
        struct diff_filespec *filespec;
        struct file_similarity *next;
 };
@@ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec 
*filespec)
        return hash;
 }
 
-static int find_identical_files(struct file_similarity *src,
-                               struct file_similarity *dst,
+static int find_identical_files(struct hash_table *srcs,
+                               int dst_index,
                                struct diff_options *options)
 {
        int renames = 0;
 
-       /*
-        * Walk over all the destinations ...
-        */
-       do {
-       struct diff_filespec *target = dst->filespec;
+       struct diff_filespec *target = rename_dst[dst_index].two;
        struct file_similarity *p, *best;
        int i = 100, best_score = -1;
 
        /*
-        * .. to find the best source match
+        * Find the best source match for specified destination.
         */
        best = NULL;
-       for (p = src; p; p = p->next) {
+       for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) {
                int score;
                struct diff_filespec *source = p->filespec;
 
@@ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity 
*src,
                        break;
        }
        if (best) {
-               record_rename_pair(dst->index, best->index, MAX_SCORE);
+               record_rename_pair(dst_index, best->index, MAX_SCORE);
                renames++;
        }
-       } while ((dst = dst->next) != NULL);
        return renames;
 }
 
-static void free_similarity_list(struct file_similarity *p)
+static int free_similarity_list(void *p, void *unused)
 {
        while (p) {
                struct file_similarity *entry = p;
-               p = p->next;
+               p = entry->next;
                free(entry);
        }
+       return 0;
 }
 
-static int find_same_files(void *ptr, void *data)
-{
-       int ret;
-       struct file_similarity *p = ptr;
-       struct file_similarity *src = NULL, *dst = NULL;
-       struct diff_options *options = data;
-
-       /* Split the hash list up into sources and destinations */
-       do {
-               struct file_similarity *entry = p;
-               p = p->next;
-               if (entry->src_dst < 0) {
-                       entry->next = src;
-                       src = entry;
-               } else {
-                       entry->next = dst;
-                       dst = entry;
-               }
-       } while (p);
-
-       /*
-        * If we have both sources *and* destinations, see if
-        * we can match them up
-        */
-       ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
-
-       /* Free the hashes and return the number of renames found */
-       free_similarity_list(src);
-       free_similarity_list(dst);
-       return ret;
-}
-
-static void insert_file_table(struct hash_table *table, int src_dst, int 
index, struct diff_filespec *filespec)
+static void insert_file_table(struct hash_table *table, int index, struct 
diff_filespec *filespec)
 {
        void **pos;
        unsigned int hash;
        struct file_similarity *entry = xmalloc(sizeof(*entry));
 
-       entry->src_dst = src_dst;
        entry->index = index;
        entry->filespec = filespec;
        entry->next = NULL;
@@ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, 
int src_dst, int index,
  */
 static int find_exact_renames(struct diff_options *options)
 {
-       int i;
+       int i, renames;
        struct hash_table file_table;
 
+       /* Add all sources to the hash table */
        init_hash(&file_table);
-       preallocate_hash(&file_table, rename_src_nr + rename_dst_nr);
+       preallocate_hash(&file_table, rename_src_nr);
        for (i = 0; i < rename_src_nr; i++)
-               insert_file_table(&file_table, -1, i, rename_src[i].p->one);
+               insert_file_table(&file_table, i, rename_src[i].p->one);
 
+       /* Walk the destinations and find best source match */
        for (i = 0; i < rename_dst_nr; i++)
-               insert_file_table(&file_table, 1, i, rename_dst[i].two);
+               renames += find_identical_files(&file_table, i, options);
 
-       /* Find the renames */
-       i = for_each_hash(&file_table, find_same_files, options);
+       /* Free source file_similarity chains */
+       for_each_hash(&file_table, free_similarity_list, options);
 
        /* .. and free the hash data structure */
        free_hash(&file_table);
 
-       return i;
+       return renames;
 }
 
 #define NUM_CANDIDATE_PER_DST 4
-- 
1.8.4.11.g4f52745.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to