From: Dave Borowitz <dborow...@google.com> find_copies_harder should only affect how unchanged entries are handled; modified files can still be considered the source of copies.
This makes the default rename detection behave the same as "git diff-tree -M -C" rather than just -M. Change-Id: I9cf4141b009e568cb2eb53cad062bca97cc4dcd8 --- NEWS | 3 +++ dulwich/diff_tree.py | 16 +++++++++------- dulwich/tests/test_diff_tree.py | 36 +++++++++++++++++++++--------------- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index 7a6efc6..5d953fb 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,9 @@ * New tree_changes_for_merge function in diff_tree. (Dave Borowitz) + * Easy rename detection in RenameDetector even without find_copies_harder. + (Dave Borowitz) + BUG FIXES * Avoid storing all objects in memory when writing pack. diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py index eb0b152..2e6fa3b 100644 --- a/dulwich/diff_tree.py +++ b/dulwich/diff_tree.py @@ -437,10 +437,12 @@ class RenameDetector(object): elif self._should_split(change): self._deletes.append(TreeChange.delete(change.old)) self._adds.append(TreeChange.add(change.new)) - elif (self._find_copies_harder and ( - change.type == CHANGE_MODIFY or change.type == CHANGE_UNCHANGED)): - # Treat modified/unchanged as deleted rather than splitting it, - # to avoid spurious renames. + elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED) + or change.type == CHANGE_MODIFY): + # Treat all modifies as potential deletes for rename detection, + # but don't split them (to avoid spurious renames). Setting + # find_copies_harder means we treat unchanged the same as + # modified. self._deletes.append(change) else: self._changes.append(change) @@ -457,8 +459,7 @@ class RenameDetector(object): delete_map = defaultdict(list) for delete in self._deletes: # Keep track of whether the delete was actually marked as a delete. - # If not, it must have been added due to find_copies_harder, and - # needs to be marked as a copy. + # If not, it needs to be marked as a copy. is_delete = delete.type == CHANGE_DELETE delete_map[delete.old.sha].append((delete.old, is_delete)) @@ -469,7 +470,8 @@ class RenameDetector(object): for (old, is_delete), new in itertools.izip(sha_deletes, sha_adds): if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode): continue - delete_paths.add(old.path) + if is_delete: + delete_paths.add(old.path) add_paths.add(new.path) new_type = is_delete and CHANGE_RENAME or CHANGE_COPY self._changes.append(TreeChange(new_type, old, new)) diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py index 02c86c4..0dc6182 100644 --- a/dulwich/tests/test_diff_tree.py +++ b/dulwich/tests/test_diff_tree.py @@ -617,6 +617,26 @@ class RenameDetectionTest(DiffTestCase): TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))], self.detect_renames(tree1, tree2)) + def test_exact_copy_modify(self): + blob1 = make_object(Blob, data='a\nb\nc\nd\n') + blob2 = make_object(Blob, data='a\nb\nc\ne\n') + tree1 = self.commit_tree([('a', blob1)]) + tree2 = self.commit_tree([('a', blob2), ('b', blob1)]) + self.assertEqual( + [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)), + TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob1.id))], + self.detect_renames(tree1, tree2)) + + def test_exact_copy_change_mode(self): + blob = make_object(Blob, data='a\nb\nc\nd\n') + tree1 = self.commit_tree([('a', blob)]) + tree2 = self.commit_tree([('a', blob, 0100755), ('b', blob)]) + self.assertEqual( + [TreeChange(CHANGE_MODIFY, ('a', F, blob.id), + ('a', 0100755, blob.id)), + TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))], + self.detect_renames(tree1, tree2)) + def test_rename_threshold(self): blob1 = make_object(Blob, data='a\nb\nc\n') blob2 = make_object(Blob, data='a\nb\nd\n') @@ -766,7 +786,7 @@ class RenameDetectionTest(DiffTestCase): no_renames = [ TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)), - TreeChange.add(('b', F, blob2.id))] + TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))] self.assertEqual( no_renames, self.detect_renames(tree1, tree2)) self.assertEqual( @@ -797,20 +817,6 @@ class RenameDetectionTest(DiffTestCase): [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))], self.detect_renames(tree1, tree2, find_copies_harder=True)) - def test_find_copies_harder_modify(self): - blob1 = make_object(Blob, data='a\nb\nc\nd\n') - blob2 = make_object(Blob, data='a\nb\nc\ne\n') - tree1 = self.commit_tree([('a', blob1)]) - tree2 = self.commit_tree([('a', blob2), ('b', blob2)]) - self.assertEqual( - [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)), - TreeChange.add(('b', F, blob2.id))], - self.detect_renames(tree1, tree2)) - self.assertEqual( - [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)), - TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))], - self.detect_renames(tree1, tree2, find_copies_harder=True)) - def test_find_copies_harder_with_rewrites(self): blob_a1 = make_object(Blob, data='a\nb\nc\nd\n') blob_a2 = make_object(Blob, data='f\ng\nh\ni\n') -- 1.7.3.1 _______________________________________________ Mailing list: https://launchpad.net/~dulwich-users Post to : dulwich-users@lists.launchpad.net Unsubscribe : https://launchpad.net/~dulwich-users More help : https://help.launchpad.net/ListHelp