Re: [PATCH v3 21/33] merge-recursive: add get_directory_renames()

2017-11-25 Thread Elijah Newren
On Sat, Nov 25, 2017 at 4:52 PM, Johannes Schindelin
 wrote:
> On Tue, 21 Nov 2017, Elijah Newren wrote:
>
>> diff --git a/merge-recursive.c b/merge-recursive.c

>> + if (old_len != new_len || strncmp(old_path, new_path, old_len)) {
>> + *old_dir = strndup(old_path, old_len);
>> + *new_dir = strndup(new_path, new_len);
>
> These two callers of strndup() are the only ones in Git's code base now.
> It is also causing a compile error on Windows.
>
> Any reason you did not use xstrndup() here?
>
> Ciao,
> Dscho

Nope, was just unaware.  I'll go ahead and switch them over for my
next roll of the series.  Sorry for the pain.


Re: [PATCH v3 21/33] merge-recursive: add get_directory_renames()

2017-11-25 Thread Johannes Schindelin
Hi Elijah,

On Tue, 21 Nov 2017, Elijah Newren wrote:

> diff --git a/merge-recursive.c b/merge-recursive.c
> index 2f4f85314a..6a0a6d4366 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -1384,6 +1384,132 @@ static struct diff_queue_struct *get_diffpairs(struct 
> merge_options *o,
>   return ret;
>  }
>  
> +static void get_renamed_dir_portion(const char *old_path, const char 
> *new_path,
> + char **old_dir, char **new_dir)
> +{
> + char *end_of_old, *end_of_new;
> + int old_len, new_len;
> +
> + *old_dir = NULL;
> + *new_dir = NULL;
> +
> + /* For
> +  *"a/b/c/d/foo.c" -> "a/b/something-else/d/foo.c"
> +  * the "d/foo.c" part is the same, we just want to know that
> +  *"a/b/c" was renamed to "a/b/something-else"
> +  * so, for this example, this function returns "a/b/c" in
> +  * *old_dir and "a/b/something-else" in *new_dir.
> +  *
> +  * Also, if the basename of the file changed, we don't care.  We
> +  * want to know which portion of the directory, if any, changed.
> +  */
> + end_of_old = strrchr(old_path, '/');
> + end_of_new = strrchr(new_path, '/');
> +
> + if (end_of_old == NULL || end_of_new == NULL)
> + return;
> + while (*--end_of_new == *--end_of_old &&
> +end_of_old != old_path &&
> +end_of_new != new_path)
> + ; /* Do nothing; all in the while loop */
> + /*
> +  * We've found the first non-matching character in the directory
> +  * paths.  That means the current directory we were comparing
> +  * represents the rename.  Move end_of_old and end_of_new back
> +  * to the full directory name.
> +  */
> + if (*end_of_old == '/')
> + end_of_old++;
> + if (*end_of_old != '/')
> + end_of_new++;
> + end_of_old = strchr(end_of_old, '/');
> + end_of_new = strchr(end_of_new, '/');
> +
> + /*
> +  * It may have been the case that old_path and new_path were the same
> +  * directory all along.  Don't claim a rename if they're the same.
> +  */
> + old_len = end_of_old - old_path;
> + new_len = end_of_new - new_path;
> +
> + if (old_len != new_len || strncmp(old_path, new_path, old_len)) {
> + *old_dir = strndup(old_path, old_len);
> + *new_dir = strndup(new_path, new_len);

These two callers of strndup() are the only ones in Git's code base now.
It is also causing a compile error on Windows.

Any reason you did not use xstrndup() here?

Ciao,
Dscho


[PATCH v3 21/33] merge-recursive: add get_directory_renames()

2017-11-21 Thread Elijah Newren
This populates a list of directory renames for us.  The list of
directory renames is not yet used, but will be in subsequent commits.

Signed-off-by: Elijah Newren 
---
 merge-recursive.c | 152 ++
 1 file changed, 152 insertions(+)

diff --git a/merge-recursive.c b/merge-recursive.c
index 2f4f85314a..6a0a6d4366 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -1384,6 +1384,132 @@ static struct diff_queue_struct *get_diffpairs(struct 
merge_options *o,
return ret;
 }
 
+static void get_renamed_dir_portion(const char *old_path, const char *new_path,
+   char **old_dir, char **new_dir)
+{
+   char *end_of_old, *end_of_new;
+   int old_len, new_len;
+
+   *old_dir = NULL;
+   *new_dir = NULL;
+
+   /* For
+*"a/b/c/d/foo.c" -> "a/b/something-else/d/foo.c"
+* the "d/foo.c" part is the same, we just want to know that
+*"a/b/c" was renamed to "a/b/something-else"
+* so, for this example, this function returns "a/b/c" in
+* *old_dir and "a/b/something-else" in *new_dir.
+*
+* Also, if the basename of the file changed, we don't care.  We
+* want to know which portion of the directory, if any, changed.
+*/
+   end_of_old = strrchr(old_path, '/');
+   end_of_new = strrchr(new_path, '/');
+
+   if (end_of_old == NULL || end_of_new == NULL)
+   return;
+   while (*--end_of_new == *--end_of_old &&
+  end_of_old != old_path &&
+  end_of_new != new_path)
+   ; /* Do nothing; all in the while loop */
+   /*
+* We've found the first non-matching character in the directory
+* paths.  That means the current directory we were comparing
+* represents the rename.  Move end_of_old and end_of_new back
+* to the full directory name.
+*/
+   if (*end_of_old == '/')
+   end_of_old++;
+   if (*end_of_old != '/')
+   end_of_new++;
+   end_of_old = strchr(end_of_old, '/');
+   end_of_new = strchr(end_of_new, '/');
+
+   /*
+* It may have been the case that old_path and new_path were the same
+* directory all along.  Don't claim a rename if they're the same.
+*/
+   old_len = end_of_old - old_path;
+   new_len = end_of_new - new_path;
+
+   if (old_len != new_len || strncmp(old_path, new_path, old_len)) {
+   *old_dir = strndup(old_path, old_len);
+   *new_dir = strndup(new_path, new_len);
+   }
+}
+
+static struct hashmap *get_directory_renames(struct diff_queue_struct *pairs,
+struct tree *tree)
+{
+   struct hashmap *dir_renames;
+   struct hashmap_iter iter;
+   struct dir_rename_entry *entry;
+   int i;
+
+   dir_renames = malloc(sizeof(struct hashmap));
+   dir_rename_init(dir_renames);
+   for (i = 0; i < pairs->nr; ++i) {
+   struct string_list_item *item;
+   int *count;
+   struct diff_filepair *pair = pairs->queue[i];
+   char *old_dir, *new_dir;
+
+   get_renamed_dir_portion(pair->one->path, pair->two->path,
+   _dir,_dir);
+   if (!old_dir)
+   /* Directory didn't change at all; ignore this one. */
+   continue;
+
+   entry = dir_rename_find_entry(dir_renames, old_dir);
+   if (!entry) {
+   entry = xmalloc(sizeof(struct dir_rename_entry));
+   dir_rename_entry_init(entry, old_dir);
+   hashmap_put(dir_renames, entry);
+   } else {
+   free(old_dir);
+   }
+   item = string_list_lookup(>possible_new_dirs, new_dir);
+   if (!item) {
+   item = string_list_insert(>possible_new_dirs,
+ new_dir);
+   item->util = xcalloc(1, sizeof(int));
+   } else {
+   free(new_dir);
+   }
+   count = item->util;
+   *count += 1;
+   }
+
+   hashmap_iter_init(dir_renames, );
+   while ((entry = hashmap_iter_next())) {
+   int max = 0;
+   int bad_max = 0;
+   char *best = NULL;
+
+   for (i = 0; i < entry->possible_new_dirs.nr; i++) {
+   int *count = entry->possible_new_dirs.items[i].util;
+
+   if (*count == max)
+   bad_max = max;
+   else if (*count > max) {
+   max = *count;
+   best = entry->possible_new_dirs.items[i].string;
+   }
+   }
+