From: Dave Borowitz <dborow...@google.com> Change-Id: Idfc916a233d016df6f733941c19a8a34cdd3dad9 --- NEWS | 2 + dulwich/diff_tree.py | 85 +++++++++++++++++++++++++++++++ dulwich/tests/test_diff_tree.py | 105 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+), 0 deletions(-)
diff --git a/NEWS b/NEWS index 7dba793..e00c9c5 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,8 @@ * New walk module with a Walker class for customizable commit walking. (Dave Borowitz) + * New tree_changes_for_merge function in diff_tree. (Dave Borowitz) + BUG FIXES * Avoid storing all objects in memory when writing pack. diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py index affbe54..444355b 100644 --- a/dulwich/diff_tree.py +++ b/dulwich/diff_tree.py @@ -193,6 +193,91 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False): yield TreeChange(change_type, entry1, entry2) +def _all_eq(seq, key, value): + for e in seq: + if key(e) != value: + return False + return True + + +def _all_same(seq, key): + return _all_eq(seq[1:], key, key(seq[0])) + + +def _matches_any_parent(store, parent_tree_ids, changes): + have = [c for c in changes if c is not None] + assert have + new = have[0].new + + # Look in changes for parents we already have first. + for change in have: + if new.sha == change.old.sha: + return True + + # A change may be None if that path was unchanged, so we need to actually + # look up the SHA for that path in any parent trees. + # TODO: We could precompute these old_shas (e.g. by passing want_unchanged + # to tree_changes), but the assumption is that the cost of tree lookups due + # to conflicts is less than the savings we're getting by pruning identical + # subtrees. + missing = [p for p, c in zip(parent_tree_ids, changes) if c is None] + get = store.__getitem__ + for parent_tree_id in missing: + tree = get(parent_tree_id) + try: + _, old_sha = tree.lookup_path(get, new.path) + except KeyError: + continue + if new.sha == old_sha: + return True + return False + + +def tree_changes_for_merge(store, parent_tree_ids, tree_id): + """Get the tree changes for a merge tree relative to all its parents. + + :param store: An ObjectStore for looking up objects. + :param parent_tree_ids: An iterable of the SHAs of the parent trees. + :param tree_id: The SHA of the merge tree. + + :yield: Lists of TreeChange objects, one per conflicted path in the merge. + + Each list contains one element per parent, with the TreeChange for that + path relative to that parent. An element may be None if it never existed + in one parent and was deleted in two others. + + A path is only included in the output if it is a conflict, i.e. its SHA + in the merge tree is not found in any of the parents, or in the case of + deletes, if not all of the old SHAs match. + """ + all_parent_changes = [tree_changes(store, t, tree_id) + for t in parent_tree_ids] + num_parents = len(parent_tree_ids) + changes_by_path = defaultdict(lambda: [None] * num_parents) + + # Organize by path. + for i, parent_changes in enumerate(all_parent_changes): + for change in parent_changes: + if change.type == CHANGE_DELETE: + path = change.old.path + else: + path = change.new.path + changes_by_path[path][i] = change + + old_sha = lambda c: c.old.sha + change_type = lambda c: c.type + + # Yield only conflicting changes. + for _, changes in sorted(changes_by_path.iteritems()): + assert len(changes) == num_parents + have = [c for c in changes if c is not None] + if _all_eq(have, change_type, CHANGE_DELETE): + if not _all_same(have, old_sha): + yield changes + elif not _matches_any_parent(store, parent_tree_ids, changes): + yield changes + + _BLOCK_SIZE = 64 diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py index d5fa5c4..aa544ac 100644 --- a/dulwich/tests/test_diff_tree.py +++ b/dulwich/tests/test_diff_tree.py @@ -27,6 +27,7 @@ from dulwich.diff_tree import ( _merge_entries, _merge_entries_py, tree_changes, + tree_changes_for_merge, _count_blocks, _count_blocks_py, _similarity_score, @@ -288,6 +289,110 @@ class TreeChangesTest(DiffTestCase): ('a', F, blob_a2.id))], tree1, tree2) + def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree, + **kwargs): + parent_tree_ids = [t.id for t in parent_trees] + actual = list(tree_changes_for_merge( + self.store, parent_tree_ids, merge_tree.id, **kwargs)) + self.assertEqual(expected, actual) + + parent_tree_ids.reverse() + expected = [list(reversed(cs)) for cs in expected] + actual = list(tree_changes_for_merge( + self.store, parent_tree_ids, merge_tree.id, **kwargs)) + self.assertEqual(expected, actual) + + def test_tree_changes_for_merge_add_no_conflict(self): + blob = make_object(Blob, data='blob') + parent1 = self.commit_tree([]) + parent2 = merge = self.commit_tree([('a', blob)]) + self.assertChangesForMergeEqual([], [parent1, parent2], merge) + self.assertChangesForMergeEqual([], [parent2, parent2], merge) + + def test_tree_changes_for_merge_add_modify_conflict(self): + blob1 = make_object(Blob, data='1') + blob2 = make_object(Blob, data='2') + parent1 = self.commit_tree([]) + parent2 = self.commit_tree([('a', blob1)]) + merge = self.commit_tree([('a', blob2)]) + self.assertChangesForMergeEqual( + [[TreeChange.add(('a', F, blob2.id)), + TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id))]], + [parent1, parent2], merge) + + def test_tree_changes_for_merge_modify_modify_conflict(self): + blob1 = make_object(Blob, data='1') + blob2 = make_object(Blob, data='2') + blob3 = make_object(Blob, data='3') + parent1 = self.commit_tree([('a', blob1)]) + parent2 = self.commit_tree([('a', blob2)]) + merge = self.commit_tree([('a', blob3)]) + self.assertChangesForMergeEqual( + [[TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)), + TreeChange(CHANGE_MODIFY, ('a', F, blob2.id), ('a', F, blob3.id))]], + [parent1, parent2], merge) + + def test_tree_changes_for_merge_modify_no_conflict(self): + blob1 = make_object(Blob, data='1') + blob2 = make_object(Blob, data='2') + parent1 = self.commit_tree([('a', blob1)]) + parent2 = merge = self.commit_tree([('a', blob2)]) + self.assertChangesForMergeEqual([], [parent1, parent2], merge) + + def test_tree_changes_for_merge_delete_delete_conflict(self): + blob1 = make_object(Blob, data='1') + blob2 = make_object(Blob, data='2') + parent1 = self.commit_tree([('a', blob1)]) + parent2 = self.commit_tree([('a', blob2)]) + merge = self.commit_tree([]) + self.assertChangesForMergeEqual( + [[TreeChange.delete(('a', F, blob1.id)), + TreeChange.delete(('a', F, blob2.id))]], + [parent1, parent2], merge) + + def test_tree_changes_for_merge_delete_no_conflict(self): + blob = make_object(Blob, data='blob') + has = self.commit_tree([('a', blob)]) + doesnt_have = self.commit_tree([]) + self.assertChangesForMergeEqual([], [has, has], doesnt_have) + self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have) + + def test_tree_changes_for_merge_octopus_no_conflict(self): + r = range(5) + blobs = [make_object(Blob, data=str(i)) for i in r] + parents = [self.commit_tree([('a', blobs[i])]) for i in r] + for i in r: + # Take the SHA from each of the parents. + self.assertChangesForMergeEqual([], parents, parents[i]) + + def test_tree_changes_for_merge_octopus_modify_conflict(self): + # Because the octopus merge strategy is limited, I doubt it's possible + # to create this with the git command line. But the output is well- + # defined, so test it anyway. + r = range(5) + parent_blobs = [make_object(Blob, data=str(i)) for i in r] + merge_blob = make_object(Blob, data='merge') + parents = [self.commit_tree([('a', parent_blobs[i])]) for i in r] + merge = self.commit_tree([('a', merge_blob)]) + expected = [[TreeChange(CHANGE_MODIFY, ('a', F, parent_blobs[i].id), + ('a', F, merge_blob.id)) for i in r]] + self.assertChangesForMergeEqual(expected, parents, merge) + + def test_tree_changes_for_merge_octopus_delete(self): + blob1 = make_object(Blob, data='1') + blob2 = make_object(Blob, data='3') + parent1 = self.commit_tree([('a', blob1)]) + parent2 = self.commit_tree([('a', blob2)]) + parent3 = merge = self.commit_tree([]) + self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge) + self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge) + self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge) + self.assertChangesForMergeEqual( + [[TreeChange.delete(('a', F, blob1.id)), + TreeChange.delete(('a', F, blob2.id)), + None]], + [parent1, parent2, parent3], merge) + class RenameDetectionTest(DiffTestCase): -- 1.7.3.1 _______________________________________________ Mailing list: https://launchpad.net/~dulwich-users Post to : dulwich-users@lists.launchpad.net Unsubscribe : https://launchpad.net/~dulwich-users More help : https://help.launchpad.net/ListHelp